{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:32:24.279370', 'step': 0, 'epoch': 0} {'type': 'pplx', 'content': 21944.183071258598, 'timestamp': '2025-09-10 02:32:24.282927', 'step': 0, 'epoch': 0} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:24.377122', 'step': 0, 'epoch': 1} {'type': 'loss', 'content': 1.0041145086288452, 'timestamp': '2025-09-10 02:32:24.379056', 'step': 1, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:24.447195', 'step': 1, 'epoch': 1} {'type': 'loss', 'content': 1.0089284181594849, 'timestamp': '2025-09-10 02:32:24.449142', 'step': 2, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:24.514510', 'step': 2, 'epoch': 1} {'type': 'loss', 'content': 0.9882441759109497, 'timestamp': '2025-09-10 02:32:24.516272', 'step': 3, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:24.576833', 'step': 3, 'epoch': 1} {'type': 'loss', 'content': 0.9745813608169556, 'timestamp': '2025-09-10 02:32:24.621239', 'step': 4, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:24.704226', 'step': 4, 'epoch': 1} {'type': 'loss', 'content': 0.8602046966552734, 'timestamp': '2025-09-10 02:32:24.706259', 'step': 5, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:24.762530', 'step': 5, 'epoch': 1} {'type': 'loss', 'content': 0.6281158924102783, 'timestamp': '2025-09-10 02:32:24.764425', 'step': 6, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:24.817925', 'step': 6, 'epoch': 1} {'type': 'loss', 'content': 0.7014464139938354, 'timestamp': '2025-09-10 02:32:24.820004', 'step': 7, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:24.875025', 'step': 7, 'epoch': 1} {'type': 'loss', 'content': 0.838533878326416, 'timestamp': '2025-09-10 02:32:24.881295', 'step': 8, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:24.934505', 'step': 8, 'epoch': 1} {'type': 'loss', 'content': 0.4820110499858856, 'timestamp': '2025-09-10 02:32:24.936476', 'step': 9, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:24.997616', 'step': 9, 'epoch': 1} {'type': 'loss', 'content': 0.5526893138885498, 'timestamp': '2025-09-10 02:32:24.999706', 'step': 10, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:25.053222', 'step': 10, 'epoch': 1} {'type': 'loss', 'content': 0.4873015880584717, 'timestamp': '2025-09-10 02:32:25.055428', 'step': 11, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:25.109045', 'step': 11, 'epoch': 1} {'type': 'loss', 'content': 0.5014121532440186, 'timestamp': '2025-09-10 02:32:25.114659', 'step': 12, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:25.166912', 'step': 12, 'epoch': 1} {'type': 'loss', 'content': 0.3116726875305176, 'timestamp': '2025-09-10 02:32:25.168950', 'step': 13, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:25.235871', 'step': 13, 'epoch': 1} {'type': 'loss', 'content': 0.3410688042640686, 'timestamp': '2025-09-10 02:32:25.237758', 'step': 14, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:25.290853', 'step': 14, 'epoch': 1} {'type': 'loss', 'content': 0.36341652274131775, 'timestamp': '2025-09-10 02:32:25.292761', 'step': 15, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:25.347647', 'step': 15, 'epoch': 1} {'type': 'loss', 'content': 0.4025854468345642, 'timestamp': '2025-09-10 02:32:25.353423', 'step': 16, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:25.405719', 'step': 16, 'epoch': 1} {'type': 'loss', 'content': 0.29315581917762756, 'timestamp': '2025-09-10 02:32:25.407721', 'step': 17, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:25.460781', 'step': 17, 'epoch': 1} {'type': 'loss', 'content': 0.3192974328994751, 'timestamp': '2025-09-10 02:32:25.462721', 'step': 18, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:25.531836', 'step': 18, 'epoch': 1} {'type': 'loss', 'content': 0.39407408237457275, 'timestamp': '2025-09-10 02:32:25.533995', 'step': 19, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:25.586794', 'step': 19, 'epoch': 1} {'type': 'loss', 'content': 0.30724984407424927, 'timestamp': '2025-09-10 02:32:25.592581', 'step': 20, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:25.645647', 'step': 20, 'epoch': 1} {'type': 'loss', 'content': 0.46406733989715576, 'timestamp': '2025-09-10 02:32:25.647642', 'step': 21, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:25.700151', 'step': 21, 'epoch': 1} {'type': 'loss', 'content': 0.3323747217655182, 'timestamp': '2025-09-10 02:32:25.702281', 'step': 22, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:25.756499', 'step': 22, 'epoch': 1} {'type': 'loss', 'content': 0.2438022494316101, 'timestamp': '2025-09-10 02:32:25.758662', 'step': 23, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:25.811618', 'step': 23, 'epoch': 1} {'type': 'loss', 'content': 0.3654380142688751, 'timestamp': '2025-09-10 02:32:25.817265', 'step': 24, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:25.870108', 'step': 24, 'epoch': 1} {'type': 'loss', 'content': 0.26729923486709595, 'timestamp': '2025-09-10 02:32:25.872097', 'step': 25, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:25.933089', 'step': 25, 'epoch': 1} {'type': 'loss', 'content': 0.29395151138305664, 'timestamp': '2025-09-10 02:32:25.935054', 'step': 26, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:25.988117', 'step': 26, 'epoch': 1} {'type': 'loss', 'content': 0.3954828977584839, 'timestamp': '2025-09-10 02:32:25.990301', 'step': 27, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:26.043328', 'step': 27, 'epoch': 1} {'type': 'loss', 'content': 0.33909183740615845, 'timestamp': '2025-09-10 02:32:26.049091', 'step': 28, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:26.101197', 'step': 28, 'epoch': 1} {'type': 'loss', 'content': 0.17999111115932465, 'timestamp': '2025-09-10 02:32:26.103237', 'step': 29, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:26.156650', 'step': 29, 'epoch': 1} {'type': 'loss', 'content': 0.15281881392002106, 'timestamp': '2025-09-10 02:32:26.158655', 'step': 30, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:26.212020', 'step': 30, 'epoch': 1} {'type': 'loss', 'content': 0.26366767287254333, 'timestamp': '2025-09-10 02:32:26.214086', 'step': 31, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:26.266795', 'step': 31, 'epoch': 1} {'type': 'loss', 'content': 0.23532210290431976, 'timestamp': '2025-09-10 02:32:26.272468', 'step': 32, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:26.330489', 'step': 32, 'epoch': 1} {'type': 'loss', 'content': 0.30194488167762756, 'timestamp': '2025-09-10 02:32:26.332669', 'step': 33, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:26.385848', 'step': 33, 'epoch': 1} {'type': 'loss', 'content': 0.35206761956214905, 'timestamp': '2025-09-10 02:32:26.387834', 'step': 34, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:26.442365', 'step': 34, 'epoch': 1} {'type': 'loss', 'content': 0.34203651547431946, 'timestamp': '2025-09-10 02:32:26.444179', 'step': 35, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:26.496511', 'step': 35, 'epoch': 1} {'type': 'loss', 'content': 0.2401624470949173, 'timestamp': '2025-09-10 02:32:26.502156', 'step': 36, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:26.554518', 'step': 36, 'epoch': 1} {'type': 'loss', 'content': 0.3539428412914276, 'timestamp': '2025-09-10 02:32:26.560338', 'step': 37, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:26.618910', 'step': 37, 'epoch': 1} {'type': 'loss', 'content': 0.22774025797843933, 'timestamp': '2025-09-10 02:32:26.620950', 'step': 38, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:26.675023', 'step': 38, 'epoch': 1} {'type': 'loss', 'content': 0.35983163118362427, 'timestamp': '2025-09-10 02:32:26.677215', 'step': 39, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:26.729978', 'step': 39, 'epoch': 1} {'type': 'loss', 'content': 0.29538431763648987, 'timestamp': '2025-09-10 02:32:26.735739', 'step': 40, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:26.787795', 'step': 40, 'epoch': 1} {'type': 'loss', 'content': 0.20452909171581268, 'timestamp': '2025-09-10 02:32:26.789701', 'step': 41, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:26.845883', 'step': 41, 'epoch': 1} {'type': 'loss', 'content': 0.25356626510620117, 'timestamp': '2025-09-10 02:32:26.847966', 'step': 42, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:26.906564', 'step': 42, 'epoch': 1} {'type': 'loss', 'content': 0.2052638828754425, 'timestamp': '2025-09-10 02:32:26.908474', 'step': 43, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:26.964978', 'step': 43, 'epoch': 1} {'type': 'loss', 'content': 0.30817273259162903, 'timestamp': '2025-09-10 02:32:26.970621', 'step': 44, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:27.023442', 'step': 44, 'epoch': 1} {'type': 'loss', 'content': 0.2737308442592621, 'timestamp': '2025-09-10 02:32:27.025583', 'step': 45, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:27.100490', 'step': 45, 'epoch': 1} {'type': 'loss', 'content': 0.2593870162963867, 'timestamp': '2025-09-10 02:32:27.102518', 'step': 46, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:27.172532', 'step': 46, 'epoch': 1} {'type': 'loss', 'content': 0.2366705685853958, 'timestamp': '2025-09-10 02:32:27.174562', 'step': 47, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:27.264103', 'step': 47, 'epoch': 1} {'type': 'loss', 'content': 0.31218206882476807, 'timestamp': '2025-09-10 02:32:27.269755', 'step': 48, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:27.333084', 'step': 48, 'epoch': 1} {'type': 'loss', 'content': 0.20520715415477753, 'timestamp': '2025-09-10 02:32:27.335107', 'step': 49, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:27.394362', 'step': 49, 'epoch': 1} {'type': 'loss', 'content': 0.2160646617412567, 'timestamp': '2025-09-10 02:32:27.396826', 'step': 50, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:27.459115', 'step': 50, 'epoch': 1} {'type': 'loss', 'content': 0.25812870264053345, 'timestamp': '2025-09-10 02:32:27.461258', 'step': 51, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:27.519646', 'step': 51, 'epoch': 1} {'type': 'loss', 'content': 0.3601733446121216, 'timestamp': '2025-09-10 02:32:27.525540', 'step': 52, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:27.584926', 'step': 52, 'epoch': 1} {'type': 'loss', 'content': 0.23358696699142456, 'timestamp': '2025-09-10 02:32:27.587273', 'step': 53, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:27.665015', 'step': 53, 'epoch': 1} {'type': 'loss', 'content': 0.22360984981060028, 'timestamp': '2025-09-10 02:32:27.666746', 'step': 54, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:27.724870', 'step': 54, 'epoch': 1} {'type': 'loss', 'content': 0.27864909172058105, 'timestamp': '2025-09-10 02:32:27.727168', 'step': 55, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:27.798169', 'step': 55, 'epoch': 1} {'type': 'loss', 'content': 0.15435098111629486, 'timestamp': '2025-09-10 02:32:27.803788', 'step': 56, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:27.859201', 'step': 56, 'epoch': 1} {'type': 'loss', 'content': 0.1748848706483841, 'timestamp': '2025-09-10 02:32:27.861232', 'step': 57, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:27.922017', 'step': 57, 'epoch': 1} {'type': 'loss', 'content': 0.22962087392807007, 'timestamp': '2025-09-10 02:32:27.923914', 'step': 58, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:27.977093', 'step': 58, 'epoch': 1} {'type': 'loss', 'content': 0.2257484793663025, 'timestamp': '2025-09-10 02:32:27.979001', 'step': 59, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:28.032127', 'step': 59, 'epoch': 1} {'type': 'loss', 'content': 0.18367883563041687, 'timestamp': '2025-09-10 02:32:28.037702', 'step': 60, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:28.090091', 'step': 60, 'epoch': 1} {'type': 'loss', 'content': 0.1945520043373108, 'timestamp': '2025-09-10 02:32:28.092003', 'step': 61, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:28.145388', 'step': 61, 'epoch': 1} {'type': 'loss', 'content': 0.16591082513332367, 'timestamp': '2025-09-10 02:32:28.147528', 'step': 62, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:28.201484', 'step': 62, 'epoch': 1} {'type': 'loss', 'content': 0.22525735199451447, 'timestamp': '2025-09-10 02:32:28.203538', 'step': 63, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:28.257337', 'step': 63, 'epoch': 1} {'type': 'loss', 'content': 0.28580373525619507, 'timestamp': '2025-09-10 02:32:28.263163', 'step': 64, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:28.316129', 'step': 64, 'epoch': 1} {'type': 'loss', 'content': 0.1737464815378189, 'timestamp': '2025-09-10 02:32:28.318123', 'step': 65, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:28.372247', 'step': 65, 'epoch': 1} {'type': 'loss', 'content': 0.23103010654449463, 'timestamp': '2025-09-10 02:32:28.374345', 'step': 66, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:28.429551', 'step': 66, 'epoch': 1} {'type': 'loss', 'content': 0.2807379364967346, 'timestamp': '2025-09-10 02:32:28.431723', 'step': 67, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:28.493428', 'step': 67, 'epoch': 1} {'type': 'loss', 'content': 0.2211853414773941, 'timestamp': '2025-09-10 02:32:28.499180', 'step': 68, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:28.557530', 'step': 68, 'epoch': 1} {'type': 'loss', 'content': 0.2314707636833191, 'timestamp': '2025-09-10 02:32:28.559717', 'step': 69, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:28.613462', 'step': 69, 'epoch': 1} {'type': 'loss', 'content': 0.24412792921066284, 'timestamp': '2025-09-10 02:32:28.615480', 'step': 70, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:28.668083', 'step': 70, 'epoch': 1} {'type': 'loss', 'content': 0.1777746081352234, 'timestamp': '2025-09-10 02:32:28.670153', 'step': 71, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:28.724034', 'step': 71, 'epoch': 1} {'type': 'loss', 'content': 0.24788722395896912, 'timestamp': '2025-09-10 02:32:28.729785', 'step': 72, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:28.782610', 'step': 72, 'epoch': 1} {'type': 'loss', 'content': 0.20151418447494507, 'timestamp': '2025-09-10 02:32:28.784646', 'step': 73, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:28.838067', 'step': 73, 'epoch': 1} {'type': 'loss', 'content': 0.2374686747789383, 'timestamp': '2025-09-10 02:32:28.840074', 'step': 74, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:28.896393', 'step': 74, 'epoch': 1} {'type': 'loss', 'content': 0.20186230540275574, 'timestamp': '2025-09-10 02:32:28.898375', 'step': 75, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:28.951475', 'step': 75, 'epoch': 1} {'type': 'loss', 'content': 0.35138803720474243, 'timestamp': '2025-09-10 02:32:28.962171', 'step': 76, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:29.015178', 'step': 76, 'epoch': 1} {'type': 'loss', 'content': 0.1593879908323288, 'timestamp': '2025-09-10 02:32:29.017355', 'step': 77, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:29.070814', 'step': 77, 'epoch': 1} {'type': 'loss', 'content': 0.2500762641429901, 'timestamp': '2025-09-10 02:32:29.072912', 'step': 78, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:29.126409', 'step': 78, 'epoch': 1} {'type': 'loss', 'content': 0.1843755543231964, 'timestamp': '2025-09-10 02:32:29.128461', 'step': 79, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:29.181848', 'step': 79, 'epoch': 1} {'type': 'loss', 'content': 0.2612992525100708, 'timestamp': '2025-09-10 02:32:29.187604', 'step': 80, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:29.240321', 'step': 80, 'epoch': 1} {'type': 'loss', 'content': 0.25783857703208923, 'timestamp': '2025-09-10 02:32:29.242127', 'step': 81, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:29.296483', 'step': 81, 'epoch': 1} {'type': 'loss', 'content': 0.19805344939231873, 'timestamp': '2025-09-10 02:32:29.298665', 'step': 82, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:29.355790', 'step': 82, 'epoch': 1} {'type': 'loss', 'content': 0.16169682145118713, 'timestamp': '2025-09-10 02:32:29.358044', 'step': 83, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:29.411476', 'step': 83, 'epoch': 1} {'type': 'loss', 'content': 0.2777283191680908, 'timestamp': '2025-09-10 02:32:29.417359', 'step': 84, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:29.471739', 'step': 84, 'epoch': 1} {'type': 'loss', 'content': 0.17329388856887817, 'timestamp': '2025-09-10 02:32:29.473744', 'step': 85, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:29.527228', 'step': 85, 'epoch': 1} {'type': 'loss', 'content': 0.2038126140832901, 'timestamp': '2025-09-10 02:32:29.529338', 'step': 86, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:29.582857', 'step': 86, 'epoch': 1} {'type': 'loss', 'content': 0.21457429230213165, 'timestamp': '2025-09-10 02:32:29.585136', 'step': 87, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:29.638645', 'step': 87, 'epoch': 1} {'type': 'loss', 'content': 0.24041877686977386, 'timestamp': '2025-09-10 02:32:29.644136', 'step': 88, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:29.697702', 'step': 88, 'epoch': 1} {'type': 'loss', 'content': 0.16534395515918732, 'timestamp': '2025-09-10 02:32:29.699680', 'step': 89, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:29.753141', 'step': 89, 'epoch': 1} {'type': 'loss', 'content': 0.2617025673389435, 'timestamp': '2025-09-10 02:32:29.754869', 'step': 90, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:29.808489', 'step': 90, 'epoch': 1} {'type': 'loss', 'content': 0.20668882131576538, 'timestamp': '2025-09-10 02:32:29.810121', 'step': 91, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:29.863258', 'step': 91, 'epoch': 1} {'type': 'loss', 'content': 0.18258275091648102, 'timestamp': '2025-09-10 02:32:29.869290', 'step': 92, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:29.924494', 'step': 92, 'epoch': 1} {'type': 'loss', 'content': 0.29622283577919006, 'timestamp': '2025-09-10 02:32:29.926628', 'step': 93, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:29.981675', 'step': 93, 'epoch': 1} {'type': 'loss', 'content': 0.22812192142009735, 'timestamp': '2025-09-10 02:32:29.983696', 'step': 94, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:30.038086', 'step': 94, 'epoch': 1} {'type': 'loss', 'content': 0.2046581357717514, 'timestamp': '2025-09-10 02:32:30.040015', 'step': 95, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:30.093337', 'step': 95, 'epoch': 1} {'type': 'loss', 'content': 0.21938204765319824, 'timestamp': '2025-09-10 02:32:30.099586', 'step': 96, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:30.153068', 'step': 96, 'epoch': 1} {'type': 'loss', 'content': 0.18477267026901245, 'timestamp': '2025-09-10 02:32:30.155373', 'step': 97, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:30.208937', 'step': 97, 'epoch': 1} {'type': 'loss', 'content': 0.2192724496126175, 'timestamp': '2025-09-10 02:32:30.211067', 'step': 98, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:30.264483', 'step': 98, 'epoch': 1} {'type': 'loss', 'content': 0.16493730247020721, 'timestamp': '2025-09-10 02:32:30.266539', 'step': 99, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:30.320026', 'step': 99, 'epoch': 1} {'type': 'loss', 'content': 0.23894137144088745, 'timestamp': '2025-09-10 02:32:30.325957', 'step': 100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:30.380885', 'step': 100, 'epoch': 1} {'type': 'loss', 'content': 0.24198560416698456, 'timestamp': '2025-09-10 02:32:30.383011', 'step': 101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:30.437211', 'step': 101, 'epoch': 1} {'type': 'loss', 'content': 0.24142961204051971, 'timestamp': '2025-09-10 02:32:30.439256', 'step': 102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:30.493104', 'step': 102, 'epoch': 1} {'type': 'loss', 'content': 0.32257863879203796, 'timestamp': '2025-09-10 02:32:30.495285', 'step': 103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:30.549228', 'step': 103, 'epoch': 1} {'type': 'loss', 'content': 0.203597754240036, 'timestamp': '2025-09-10 02:32:30.555277', 'step': 104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:30.608753', 'step': 104, 'epoch': 1} {'type': 'loss', 'content': 0.21888211369514465, 'timestamp': '2025-09-10 02:32:30.610963', 'step': 105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:30.664269', 'step': 105, 'epoch': 1} {'type': 'loss', 'content': 0.26044487953186035, 'timestamp': '2025-09-10 02:32:30.666459', 'step': 106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:30.719699', 'step': 106, 'epoch': 1} {'type': 'loss', 'content': 0.32572120428085327, 'timestamp': '2025-09-10 02:32:30.721802', 'step': 107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:30.778693', 'step': 107, 'epoch': 1} {'type': 'loss', 'content': 0.35263967514038086, 'timestamp': '2025-09-10 02:32:30.784794', 'step': 108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:30.838725', 'step': 108, 'epoch': 1} {'type': 'loss', 'content': 0.18158814311027527, 'timestamp': '2025-09-10 02:32:30.840619', 'step': 109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:30.898105', 'step': 109, 'epoch': 1} {'type': 'loss', 'content': 0.2702735960483551, 'timestamp': '2025-09-10 02:32:30.900121', 'step': 110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:30.954763', 'step': 110, 'epoch': 1} {'type': 'loss', 'content': 0.14954949915409088, 'timestamp': '2025-09-10 02:32:30.956636', 'step': 111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:31.010004', 'step': 111, 'epoch': 1} {'type': 'loss', 'content': 0.18410463631153107, 'timestamp': '2025-09-10 02:32:31.016018', 'step': 112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:31.070392', 'step': 112, 'epoch': 1} {'type': 'loss', 'content': 0.23767073452472687, 'timestamp': '2025-09-10 02:32:31.072426', 'step': 113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:31.127310', 'step': 113, 'epoch': 1} {'type': 'loss', 'content': 0.35849422216415405, 'timestamp': '2025-09-10 02:32:31.129425', 'step': 114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:31.183898', 'step': 114, 'epoch': 1} {'type': 'loss', 'content': 0.17927177250385284, 'timestamp': '2025-09-10 02:32:31.186939', 'step': 115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:31.241892', 'step': 115, 'epoch': 1} {'type': 'loss', 'content': 0.26307249069213867, 'timestamp': '2025-09-10 02:32:31.247907', 'step': 116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:31.300606', 'step': 116, 'epoch': 1} {'type': 'loss', 'content': 0.19761648774147034, 'timestamp': '2025-09-10 02:32:31.302634', 'step': 117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:31.355803', 'step': 117, 'epoch': 1} {'type': 'loss', 'content': 0.21939942240715027, 'timestamp': '2025-09-10 02:32:31.357781', 'step': 118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:31.411335', 'step': 118, 'epoch': 1} {'type': 'loss', 'content': 0.24071873724460602, 'timestamp': '2025-09-10 02:32:31.413407', 'step': 119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:31.467140', 'step': 119, 'epoch': 1} {'type': 'loss', 'content': 0.2813093066215515, 'timestamp': '2025-09-10 02:32:31.473027', 'step': 120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:31.530881', 'step': 120, 'epoch': 1} {'type': 'loss', 'content': 0.23649735748767853, 'timestamp': '2025-09-10 02:32:31.532959', 'step': 121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:31.586898', 'step': 121, 'epoch': 1} {'type': 'loss', 'content': 0.19803555309772491, 'timestamp': '2025-09-10 02:32:31.594404', 'step': 122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:31.650067', 'step': 122, 'epoch': 1} {'type': 'loss', 'content': 0.30755141377449036, 'timestamp': '2025-09-10 02:32:31.652069', 'step': 123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:31.707823', 'step': 123, 'epoch': 1} {'type': 'loss', 'content': 0.203635573387146, 'timestamp': '2025-09-10 02:32:31.713653', 'step': 124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:31.780617', 'step': 124, 'epoch': 1} {'type': 'loss', 'content': 0.24095620214939117, 'timestamp': '2025-09-10 02:32:31.782721', 'step': 125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:31.836072', 'step': 125, 'epoch': 1} {'type': 'loss', 'content': 0.20422136783599854, 'timestamp': '2025-09-10 02:32:31.840025', 'step': 126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:31.897328', 'step': 126, 'epoch': 1} {'type': 'loss', 'content': 0.2278440147638321, 'timestamp': '2025-09-10 02:32:31.899600', 'step': 127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:31.953710', 'step': 127, 'epoch': 1} {'type': 'loss', 'content': 0.4109383821487427, 'timestamp': '2025-09-10 02:32:31.959706', 'step': 128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:32.013094', 'step': 128, 'epoch': 1} {'type': 'loss', 'content': 0.21055220067501068, 'timestamp': '2025-09-10 02:32:32.015150', 'step': 129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:32.068897', 'step': 129, 'epoch': 1} {'type': 'loss', 'content': 0.19363300502300262, 'timestamp': '2025-09-10 02:32:32.070959', 'step': 130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:32.124399', 'step': 130, 'epoch': 1} {'type': 'loss', 'content': 0.16793887317180634, 'timestamp': '2025-09-10 02:32:32.126074', 'step': 131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:32.179145', 'step': 131, 'epoch': 1} {'type': 'loss', 'content': 0.1983814239501953, 'timestamp': '2025-09-10 02:32:32.184738', 'step': 132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:32.240162', 'step': 132, 'epoch': 1} {'type': 'loss', 'content': 0.2487863302230835, 'timestamp': '2025-09-10 02:32:32.242073', 'step': 133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:32.295119', 'step': 133, 'epoch': 1} {'type': 'loss', 'content': 0.29268085956573486, 'timestamp': '2025-09-10 02:32:32.297116', 'step': 134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:32.351905', 'step': 134, 'epoch': 1} {'type': 'loss', 'content': 0.17902137339115143, 'timestamp': '2025-09-10 02:32:32.354016', 'step': 135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:32.409474', 'step': 135, 'epoch': 1} {'type': 'loss', 'content': 0.20046348869800568, 'timestamp': '2025-09-10 02:32:32.415559', 'step': 136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:32.469086', 'step': 136, 'epoch': 1} {'type': 'loss', 'content': 0.2633805274963379, 'timestamp': '2025-09-10 02:32:32.471312', 'step': 137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:32.526629', 'step': 137, 'epoch': 1} {'type': 'loss', 'content': 0.21536464989185333, 'timestamp': '2025-09-10 02:32:32.528689', 'step': 138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:32.583437', 'step': 138, 'epoch': 1} {'type': 'loss', 'content': 0.2185312807559967, 'timestamp': '2025-09-10 02:32:32.585471', 'step': 139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:32.639902', 'step': 139, 'epoch': 1} {'type': 'loss', 'content': 0.27900078892707825, 'timestamp': '2025-09-10 02:32:32.646059', 'step': 140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:32.698691', 'step': 140, 'epoch': 1} {'type': 'loss', 'content': 0.22878415882587433, 'timestamp': '2025-09-10 02:32:32.700963', 'step': 141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:32.755093', 'step': 141, 'epoch': 1} {'type': 'loss', 'content': 0.187476247549057, 'timestamp': '2025-09-10 02:32:32.757121', 'step': 142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:32.812827', 'step': 142, 'epoch': 1} {'type': 'loss', 'content': 0.22268590331077576, 'timestamp': '2025-09-10 02:32:32.814845', 'step': 143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:32.869422', 'step': 143, 'epoch': 1} {'type': 'loss', 'content': 0.29363417625427246, 'timestamp': '2025-09-10 02:32:32.875414', 'step': 144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:32.929051', 'step': 144, 'epoch': 1} {'type': 'loss', 'content': 0.20912876725196838, 'timestamp': '2025-09-10 02:32:32.931051', 'step': 145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:32.984356', 'step': 145, 'epoch': 1} {'type': 'loss', 'content': 0.2785249650478363, 'timestamp': '2025-09-10 02:32:32.986288', 'step': 146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:33.039798', 'step': 146, 'epoch': 1} {'type': 'loss', 'content': 0.26242169737815857, 'timestamp': '2025-09-10 02:32:33.041846', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:33.095288', 'step': 147, 'epoch': 1} {'type': 'loss', 'content': 0.2180704027414322, 'timestamp': '2025-09-10 02:32:33.100968', 'step': 148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:33.155031', 'step': 148, 'epoch': 1} {'type': 'loss', 'content': 0.1339309960603714, 'timestamp': '2025-09-10 02:32:33.157054', 'step': 149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:33.212446', 'step': 149, 'epoch': 1} {'type': 'loss', 'content': 0.2593352794647217, 'timestamp': '2025-09-10 02:32:33.214463', 'step': 150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:33.268765', 'step': 150, 'epoch': 1} {'type': 'loss', 'content': 0.16302672028541565, 'timestamp': '2025-09-10 02:32:33.270795', 'step': 151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:33.323820', 'step': 151, 'epoch': 1} {'type': 'loss', 'content': 0.17848609387874603, 'timestamp': '2025-09-10 02:32:33.329832', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:33.384206', 'step': 152, 'epoch': 1} {'type': 'loss', 'content': 0.273547887802124, 'timestamp': '2025-09-10 02:32:33.386330', 'step': 153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:33.439729', 'step': 153, 'epoch': 1} {'type': 'loss', 'content': 0.29128435254096985, 'timestamp': '2025-09-10 02:32:33.441746', 'step': 154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:32:33.508631', 'step': 154, 'epoch': 1} {'type': 'loss', 'content': 0.34222760796546936, 'timestamp': '2025-09-10 02:32:33.510820', 'step': 155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:33.565433', 'step': 155, 'epoch': 1} {'type': 'loss', 'content': 0.23371173441410065, 'timestamp': '2025-09-10 02:32:33.571645', 'step': 156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:33.625345', 'step': 156, 'epoch': 1} {'type': 'loss', 'content': 0.16984544694423676, 'timestamp': '2025-09-10 02:32:33.627587', 'step': 157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:33.681907', 'step': 157, 'epoch': 1} {'type': 'loss', 'content': 0.25136902928352356, 'timestamp': '2025-09-10 02:32:33.683954', 'step': 158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:33.737332', 'step': 158, 'epoch': 1} {'type': 'loss', 'content': 0.18037809431552887, 'timestamp': '2025-09-10 02:32:33.739479', 'step': 159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:33.792672', 'step': 159, 'epoch': 1} {'type': 'loss', 'content': 0.23994353413581848, 'timestamp': '2025-09-10 02:32:33.798578', 'step': 160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:33.852282', 'step': 160, 'epoch': 1} {'type': 'loss', 'content': 0.23948419094085693, 'timestamp': '2025-09-10 02:32:33.854116', 'step': 161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:33.908722', 'step': 161, 'epoch': 1} {'type': 'loss', 'content': 0.34469926357269287, 'timestamp': '2025-09-10 02:32:33.910813', 'step': 162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:33.965154', 'step': 162, 'epoch': 1} {'type': 'loss', 'content': 0.2080046683549881, 'timestamp': '2025-09-10 02:32:33.967159', 'step': 163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:34.021433', 'step': 163, 'epoch': 1} {'type': 'loss', 'content': 0.1705811321735382, 'timestamp': '2025-09-10 02:32:34.027502', 'step': 164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:34.083433', 'step': 164, 'epoch': 1} {'type': 'loss', 'content': 0.28041282296180725, 'timestamp': '2025-09-10 02:32:34.090001', 'step': 165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:34.146461', 'step': 165, 'epoch': 1} {'type': 'loss', 'content': 0.16491025686264038, 'timestamp': '2025-09-10 02:32:34.148465', 'step': 166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:34.202820', 'step': 166, 'epoch': 1} {'type': 'loss', 'content': 0.13005243241786957, 'timestamp': '2025-09-10 02:32:34.204844', 'step': 167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:34.260799', 'step': 167, 'epoch': 1} {'type': 'loss', 'content': 0.23098041117191315, 'timestamp': '2025-09-10 02:32:34.267086', 'step': 168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:34.321612', 'step': 168, 'epoch': 1} {'type': 'loss', 'content': 0.14518006145954132, 'timestamp': '2025-09-10 02:32:34.323801', 'step': 169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:34.377488', 'step': 169, 'epoch': 1} {'type': 'loss', 'content': 0.1821010261774063, 'timestamp': '2025-09-10 02:32:34.379644', 'step': 170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:34.436883', 'step': 170, 'epoch': 1} {'type': 'loss', 'content': 0.17955981194972992, 'timestamp': '2025-09-10 02:32:34.439101', 'step': 171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:34.493846', 'step': 171, 'epoch': 1} {'type': 'loss', 'content': 0.30723807215690613, 'timestamp': '2025-09-10 02:32:34.500008', 'step': 172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:34.553100', 'step': 172, 'epoch': 1} {'type': 'loss', 'content': 0.27865612506866455, 'timestamp': '2025-09-10 02:32:34.556000', 'step': 173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:34.610293', 'step': 173, 'epoch': 1} {'type': 'loss', 'content': 0.2527414560317993, 'timestamp': '2025-09-10 02:32:34.612486', 'step': 174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:34.668121', 'step': 174, 'epoch': 1} {'type': 'loss', 'content': 0.27692094445228577, 'timestamp': '2025-09-10 02:32:34.670174', 'step': 175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:34.725002', 'step': 175, 'epoch': 1} {'type': 'loss', 'content': 0.27577200531959534, 'timestamp': '2025-09-10 02:32:34.731082', 'step': 176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:34.784210', 'step': 176, 'epoch': 1} {'type': 'loss', 'content': 0.2671387195587158, 'timestamp': '2025-09-10 02:32:34.786537', 'step': 177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:34.840504', 'step': 177, 'epoch': 1} {'type': 'loss', 'content': 0.16882698237895966, 'timestamp': '2025-09-10 02:32:34.842565', 'step': 178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:34.895822', 'step': 178, 'epoch': 1} {'type': 'loss', 'content': 0.23094068467617035, 'timestamp': '2025-09-10 02:32:34.897875', 'step': 179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:34.951133', 'step': 179, 'epoch': 1} {'type': 'loss', 'content': 0.18321341276168823, 'timestamp': '2025-09-10 02:32:34.957034', 'step': 180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:35.009764', 'step': 180, 'epoch': 1} {'type': 'loss', 'content': 0.2559264898300171, 'timestamp': '2025-09-10 02:32:35.012051', 'step': 181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:35.066430', 'step': 181, 'epoch': 1} {'type': 'loss', 'content': 0.17034415900707245, 'timestamp': '2025-09-10 02:32:35.068818', 'step': 182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:35.123510', 'step': 182, 'epoch': 1} {'type': 'loss', 'content': 0.23187728226184845, 'timestamp': '2025-09-10 02:32:35.138865', 'step': 183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:35.207646', 'step': 183, 'epoch': 1} {'type': 'loss', 'content': 0.16878771781921387, 'timestamp': '2025-09-10 02:32:35.230840', 'step': 184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:35.294282', 'step': 184, 'epoch': 1} {'type': 'loss', 'content': 0.20361101627349854, 'timestamp': '2025-09-10 02:32:35.307391', 'step': 185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:35.377658', 'step': 185, 'epoch': 1} {'type': 'loss', 'content': 0.14079107344150543, 'timestamp': '2025-09-10 02:32:35.400122', 'step': 186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:35.465979', 'step': 186, 'epoch': 1} {'type': 'loss', 'content': 0.19392062723636627, 'timestamp': '2025-09-10 02:32:35.484497', 'step': 187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:35.555064', 'step': 187, 'epoch': 1} {'type': 'loss', 'content': 0.20113550126552582, 'timestamp': '2025-09-10 02:32:35.565004', 'step': 188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:32:35.627878', 'step': 188, 'epoch': 1} {'type': 'loss', 'content': 0.25227561593055725, 'timestamp': '2025-09-10 02:32:35.634140', 'step': 189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:35.690155', 'step': 189, 'epoch': 1} {'type': 'loss', 'content': 0.33267390727996826, 'timestamp': '2025-09-10 02:32:35.694020', 'step': 190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:35.752799', 'step': 190, 'epoch': 1} {'type': 'loss', 'content': 0.15574084222316742, 'timestamp': '2025-09-10 02:32:35.759558', 'step': 191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:35.815344', 'step': 191, 'epoch': 1} {'type': 'loss', 'content': 0.34467393159866333, 'timestamp': '2025-09-10 02:32:35.825586', 'step': 192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:35.881142', 'step': 192, 'epoch': 1} {'type': 'loss', 'content': 0.23968566954135895, 'timestamp': '2025-09-10 02:32:35.886222', 'step': 193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:35.943441', 'step': 193, 'epoch': 1} {'type': 'loss', 'content': 0.25149163603782654, 'timestamp': '2025-09-10 02:32:35.961196', 'step': 194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:36.025414', 'step': 194, 'epoch': 1} {'type': 'loss', 'content': 0.1950308233499527, 'timestamp': '2025-09-10 02:32:36.034564', 'step': 195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:36.092350', 'step': 195, 'epoch': 1} {'type': 'loss', 'content': 0.22761055827140808, 'timestamp': '2025-09-10 02:32:36.102705', 'step': 196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:36.168207', 'step': 196, 'epoch': 1} {'type': 'loss', 'content': 0.20356859266757965, 'timestamp': '2025-09-10 02:32:36.172658', 'step': 197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:36.229084', 'step': 197, 'epoch': 1} {'type': 'loss', 'content': 0.3264044523239136, 'timestamp': '2025-09-10 02:32:36.240293', 'step': 198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:36.300750', 'step': 198, 'epoch': 1} {'type': 'loss', 'content': 0.15476201474666595, 'timestamp': '2025-09-10 02:32:36.307043', 'step': 199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:36.375011', 'step': 199, 'epoch': 1} {'type': 'loss', 'content': 0.15208284556865692, 'timestamp': '2025-09-10 02:32:36.385425', 'step': 200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:36.442982', 'step': 200, 'epoch': 1} {'type': 'loss', 'content': 0.3419104218482971, 'timestamp': '2025-09-10 02:32:36.447075', 'step': 201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:36.499956', 'step': 201, 'epoch': 1} {'type': 'loss', 'content': 0.20106355845928192, 'timestamp': '2025-09-10 02:32:36.501891', 'step': 202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:36.555349', 'step': 202, 'epoch': 1} {'type': 'loss', 'content': 0.1463121473789215, 'timestamp': '2025-09-10 02:32:36.557412', 'step': 203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:36.610062', 'step': 203, 'epoch': 1} {'type': 'loss', 'content': 0.2636989951133728, 'timestamp': '2025-09-10 02:32:36.615876', 'step': 204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:36.667907', 'step': 204, 'epoch': 1} {'type': 'loss', 'content': 0.34823158383369446, 'timestamp': '2025-09-10 02:32:36.670108', 'step': 205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:36.722547', 'step': 205, 'epoch': 1} {'type': 'loss', 'content': 0.15631790459156036, 'timestamp': '2025-09-10 02:32:36.724644', 'step': 206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:36.778904', 'step': 206, 'epoch': 1} {'type': 'loss', 'content': 0.19695712625980377, 'timestamp': '2025-09-10 02:32:36.780943', 'step': 207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:36.833855', 'step': 207, 'epoch': 1} {'type': 'loss', 'content': 0.18043570220470428, 'timestamp': '2025-09-10 02:32:36.839647', 'step': 208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:36.892442', 'step': 208, 'epoch': 1} {'type': 'loss', 'content': 0.20610688626766205, 'timestamp': '2025-09-10 02:32:36.894470', 'step': 209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:36.947884', 'step': 209, 'epoch': 1} {'type': 'loss', 'content': 0.35081976652145386, 'timestamp': '2025-09-10 02:32:36.949701', 'step': 210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:37.002339', 'step': 210, 'epoch': 1} {'type': 'loss', 'content': 0.15562184154987335, 'timestamp': '2025-09-10 02:32:37.004175', 'step': 211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:37.056969', 'step': 211, 'epoch': 1} {'type': 'loss', 'content': 0.2525159418582916, 'timestamp': '2025-09-10 02:32:37.062582', 'step': 212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:37.114049', 'step': 212, 'epoch': 1} {'type': 'loss', 'content': 0.16666977107524872, 'timestamp': '2025-09-10 02:32:37.116044', 'step': 213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:37.168520', 'step': 213, 'epoch': 1} {'type': 'loss', 'content': 0.2616753876209259, 'timestamp': '2025-09-10 02:32:37.170633', 'step': 214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:37.223131', 'step': 214, 'epoch': 1} {'type': 'loss', 'content': 0.1793624758720398, 'timestamp': '2025-09-10 02:32:37.224993', 'step': 215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:37.277823', 'step': 215, 'epoch': 1} {'type': 'loss', 'content': 0.30556342005729675, 'timestamp': '2025-09-10 02:32:37.283526', 'step': 216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:37.336318', 'step': 216, 'epoch': 1} {'type': 'loss', 'content': 0.20936620235443115, 'timestamp': '2025-09-10 02:32:37.338291', 'step': 217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:37.390773', 'step': 217, 'epoch': 1} {'type': 'loss', 'content': 0.2139546424150467, 'timestamp': '2025-09-10 02:32:37.392623', 'step': 218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:37.445687', 'step': 218, 'epoch': 1} {'type': 'loss', 'content': 0.32253700494766235, 'timestamp': '2025-09-10 02:32:37.447635', 'step': 219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:37.500737', 'step': 219, 'epoch': 1} {'type': 'loss', 'content': 0.26913216710090637, 'timestamp': '2025-09-10 02:32:37.506336', 'step': 220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:37.558909', 'step': 220, 'epoch': 1} {'type': 'loss', 'content': 0.2108270227909088, 'timestamp': '2025-09-10 02:32:37.560888', 'step': 221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:37.613708', 'step': 221, 'epoch': 1} {'type': 'loss', 'content': 0.12853169441223145, 'timestamp': '2025-09-10 02:32:37.615577', 'step': 222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:37.668817', 'step': 222, 'epoch': 1} {'type': 'loss', 'content': 0.22631746530532837, 'timestamp': '2025-09-10 02:32:37.670724', 'step': 223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:37.723502', 'step': 223, 'epoch': 1} {'type': 'loss', 'content': 0.2135400027036667, 'timestamp': '2025-09-10 02:32:37.729087', 'step': 224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:37.781967', 'step': 224, 'epoch': 1} {'type': 'loss', 'content': 0.193584606051445, 'timestamp': '2025-09-10 02:32:37.783950', 'step': 225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:37.837320', 'step': 225, 'epoch': 1} {'type': 'loss', 'content': 0.2769329249858856, 'timestamp': '2025-09-10 02:32:37.839365', 'step': 226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:37.892245', 'step': 226, 'epoch': 1} {'type': 'loss', 'content': 0.23541676998138428, 'timestamp': '2025-09-10 02:32:37.894369', 'step': 227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:37.947678', 'step': 227, 'epoch': 1} {'type': 'loss', 'content': 0.19655534625053406, 'timestamp': '2025-09-10 02:32:37.953324', 'step': 228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:38.006176', 'step': 228, 'epoch': 1} {'type': 'loss', 'content': 0.19038799405097961, 'timestamp': '2025-09-10 02:32:38.008164', 'step': 229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:38.062088', 'step': 229, 'epoch': 1} {'type': 'loss', 'content': 0.2740563154220581, 'timestamp': '2025-09-10 02:32:38.064026', 'step': 230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:38.116731', 'step': 230, 'epoch': 1} {'type': 'loss', 'content': 0.18271687626838684, 'timestamp': '2025-09-10 02:32:38.118661', 'step': 231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:38.171784', 'step': 231, 'epoch': 1} {'type': 'loss', 'content': 0.2895713746547699, 'timestamp': '2025-09-10 02:32:38.177412', 'step': 232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:38.229576', 'step': 232, 'epoch': 1} {'type': 'loss', 'content': 0.2362276315689087, 'timestamp': '2025-09-10 02:32:38.231379', 'step': 233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:38.284330', 'step': 233, 'epoch': 1} {'type': 'loss', 'content': 0.1827460676431656, 'timestamp': '2025-09-10 02:32:38.286137', 'step': 234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:38.339157', 'step': 234, 'epoch': 1} {'type': 'loss', 'content': 0.26123175024986267, 'timestamp': '2025-09-10 02:32:38.341059', 'step': 235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:38.396788', 'step': 235, 'epoch': 1} {'type': 'loss', 'content': 0.14993634819984436, 'timestamp': '2025-09-10 02:32:38.402605', 'step': 236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:38.455499', 'step': 236, 'epoch': 1} {'type': 'loss', 'content': 0.25096821784973145, 'timestamp': '2025-09-10 02:32:38.457469', 'step': 237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:38.510299', 'step': 237, 'epoch': 1} {'type': 'loss', 'content': 0.2689826488494873, 'timestamp': '2025-09-10 02:32:38.512282', 'step': 238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:38.565439', 'step': 238, 'epoch': 1} {'type': 'loss', 'content': 0.1696462631225586, 'timestamp': '2025-09-10 02:32:38.567319', 'step': 239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:38.620304', 'step': 239, 'epoch': 1} {'type': 'loss', 'content': 0.21168534457683563, 'timestamp': '2025-09-10 02:32:38.626031', 'step': 240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:38.678889', 'step': 240, 'epoch': 1} {'type': 'loss', 'content': 0.2292841076850891, 'timestamp': '2025-09-10 02:32:38.680856', 'step': 241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:38.733956', 'step': 241, 'epoch': 1} {'type': 'loss', 'content': 0.2059919387102127, 'timestamp': '2025-09-10 02:32:38.736092', 'step': 242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:38.790317', 'step': 242, 'epoch': 1} {'type': 'loss', 'content': 0.3775722086429596, 'timestamp': '2025-09-10 02:32:38.792527', 'step': 243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:38.845271', 'step': 243, 'epoch': 1} {'type': 'loss', 'content': 0.22256137430667877, 'timestamp': '2025-09-10 02:32:38.851173', 'step': 244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:38.904252', 'step': 244, 'epoch': 1} {'type': 'loss', 'content': 0.2857944965362549, 'timestamp': '2025-09-10 02:32:38.906315', 'step': 245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:38.959691', 'step': 245, 'epoch': 1} {'type': 'loss', 'content': 0.1847964972257614, 'timestamp': '2025-09-10 02:32:38.961548', 'step': 246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:39.015064', 'step': 246, 'epoch': 1} {'type': 'loss', 'content': 0.16493086516857147, 'timestamp': '2025-09-10 02:32:39.017087', 'step': 247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:39.070247', 'step': 247, 'epoch': 1} {'type': 'loss', 'content': 0.138169065117836, 'timestamp': '2025-09-10 02:32:39.075984', 'step': 248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:39.128525', 'step': 248, 'epoch': 1} {'type': 'loss', 'content': 0.27102306485176086, 'timestamp': '2025-09-10 02:32:39.130189', 'step': 249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:39.182692', 'step': 249, 'epoch': 1} {'type': 'loss', 'content': 0.21834206581115723, 'timestamp': '2025-09-10 02:32:39.184537', 'step': 250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:39.237667', 'step': 250, 'epoch': 1} {'type': 'loss', 'content': 0.20262031257152557, 'timestamp': '2025-09-10 02:32:39.239582', 'step': 251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:39.293067', 'step': 251, 'epoch': 1} {'type': 'loss', 'content': 0.2053125947713852, 'timestamp': '2025-09-10 02:32:39.299439', 'step': 252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:39.353577', 'step': 252, 'epoch': 1} {'type': 'loss', 'content': 0.228641539812088, 'timestamp': '2025-09-10 02:32:39.355790', 'step': 253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:39.411258', 'step': 253, 'epoch': 1} {'type': 'loss', 'content': 0.20029200613498688, 'timestamp': '2025-09-10 02:32:39.413381', 'step': 254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:39.468746', 'step': 254, 'epoch': 1} {'type': 'loss', 'content': 0.12926700711250305, 'timestamp': '2025-09-10 02:32:39.470847', 'step': 255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:39.525059', 'step': 255, 'epoch': 1} {'type': 'loss', 'content': 0.15406258404254913, 'timestamp': '2025-09-10 02:32:39.531131', 'step': 256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:39.583675', 'step': 256, 'epoch': 1} {'type': 'loss', 'content': 0.2562084496021271, 'timestamp': '2025-09-10 02:32:39.585815', 'step': 257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:39.640623', 'step': 257, 'epoch': 1} {'type': 'loss', 'content': 0.1500840038061142, 'timestamp': '2025-09-10 02:32:39.642874', 'step': 258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:39.696706', 'step': 258, 'epoch': 1} {'type': 'loss', 'content': 0.131039559841156, 'timestamp': '2025-09-10 02:32:39.698576', 'step': 259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:39.752209', 'step': 259, 'epoch': 1} {'type': 'loss', 'content': 0.24963144958019257, 'timestamp': '2025-09-10 02:32:39.758105', 'step': 260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:39.810710', 'step': 260, 'epoch': 1} {'type': 'loss', 'content': 0.16896982491016388, 'timestamp': '2025-09-10 02:32:39.812724', 'step': 261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:39.866519', 'step': 261, 'epoch': 1} {'type': 'loss', 'content': 0.17015883326530457, 'timestamp': '2025-09-10 02:32:39.868749', 'step': 262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:39.922368', 'step': 262, 'epoch': 1} {'type': 'loss', 'content': 0.21799170970916748, 'timestamp': '2025-09-10 02:32:39.924464', 'step': 263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:32:39.999059', 'step': 263, 'epoch': 1} {'type': 'loss', 'content': 0.24299080669879913, 'timestamp': '2025-09-10 02:32:40.005130', 'step': 264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:40.058406', 'step': 264, 'epoch': 1} {'type': 'loss', 'content': 0.23028156161308289, 'timestamp': '2025-09-10 02:32:40.060309', 'step': 265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:40.114267', 'step': 265, 'epoch': 1} {'type': 'loss', 'content': 0.15893720090389252, 'timestamp': '2025-09-10 02:32:40.116293', 'step': 266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:40.172355', 'step': 266, 'epoch': 1} {'type': 'loss', 'content': 0.1361294984817505, 'timestamp': '2025-09-10 02:32:40.174432', 'step': 267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:40.227768', 'step': 267, 'epoch': 1} {'type': 'loss', 'content': 0.2768157720565796, 'timestamp': '2025-09-10 02:32:40.233547', 'step': 268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:40.286787', 'step': 268, 'epoch': 1} {'type': 'loss', 'content': 0.18262462317943573, 'timestamp': '2025-09-10 02:32:40.288776', 'step': 269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:40.341724', 'step': 269, 'epoch': 1} {'type': 'loss', 'content': 0.16673381626605988, 'timestamp': '2025-09-10 02:32:40.343344', 'step': 270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:40.396062', 'step': 270, 'epoch': 1} {'type': 'loss', 'content': 0.2536642849445343, 'timestamp': '2025-09-10 02:32:40.397982', 'step': 271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:40.450485', 'step': 271, 'epoch': 1} {'type': 'loss', 'content': 0.23778976500034332, 'timestamp': '2025-09-10 02:32:40.456682', 'step': 272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:40.512481', 'step': 272, 'epoch': 1} {'type': 'loss', 'content': 0.14074373245239258, 'timestamp': '2025-09-10 02:32:40.514625', 'step': 273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:40.570081', 'step': 273, 'epoch': 1} {'type': 'loss', 'content': 0.20750093460083008, 'timestamp': '2025-09-10 02:32:40.572097', 'step': 274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:40.626740', 'step': 274, 'epoch': 1} {'type': 'loss', 'content': 0.18144077062606812, 'timestamp': '2025-09-10 02:32:40.628806', 'step': 275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:40.682904', 'step': 275, 'epoch': 1} {'type': 'loss', 'content': 0.1529066264629364, 'timestamp': '2025-09-10 02:32:40.688980', 'step': 276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:40.742288', 'step': 276, 'epoch': 1} {'type': 'loss', 'content': 0.14843431115150452, 'timestamp': '2025-09-10 02:32:40.744313', 'step': 277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:40.797997', 'step': 277, 'epoch': 1} {'type': 'loss', 'content': 0.2878422141075134, 'timestamp': '2025-09-10 02:32:40.799829', 'step': 278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:40.853968', 'step': 278, 'epoch': 1} {'type': 'loss', 'content': 0.19720129668712616, 'timestamp': '2025-09-10 02:32:40.855887', 'step': 279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:40.909999', 'step': 279, 'epoch': 1} {'type': 'loss', 'content': 0.21609193086624146, 'timestamp': '2025-09-10 02:32:40.915866', 'step': 280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:40.969705', 'step': 280, 'epoch': 1} {'type': 'loss', 'content': 0.1796324998140335, 'timestamp': '2025-09-10 02:32:40.971533', 'step': 281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:41.027214', 'step': 281, 'epoch': 1} {'type': 'loss', 'content': 0.17429091036319733, 'timestamp': '2025-09-10 02:32:41.029377', 'step': 282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:41.084118', 'step': 282, 'epoch': 1} {'type': 'loss', 'content': 0.19148218631744385, 'timestamp': '2025-09-10 02:32:41.086190', 'step': 283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:41.139949', 'step': 283, 'epoch': 1} {'type': 'loss', 'content': 0.1872914731502533, 'timestamp': '2025-09-10 02:32:41.146178', 'step': 284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:41.200398', 'step': 284, 'epoch': 1} {'type': 'loss', 'content': 0.2258094996213913, 'timestamp': '2025-09-10 02:32:41.202242', 'step': 285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:41.255868', 'step': 285, 'epoch': 1} {'type': 'loss', 'content': 0.2621079683303833, 'timestamp': '2025-09-10 02:32:41.257815', 'step': 286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:41.312098', 'step': 286, 'epoch': 1} {'type': 'loss', 'content': 0.32313862442970276, 'timestamp': '2025-09-10 02:32:41.314442', 'step': 287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:41.368560', 'step': 287, 'epoch': 1} {'type': 'loss', 'content': 0.33231043815612793, 'timestamp': '2025-09-10 02:32:41.374656', 'step': 288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:41.427858', 'step': 288, 'epoch': 1} {'type': 'loss', 'content': 0.2733962833881378, 'timestamp': '2025-09-10 02:32:41.431019', 'step': 289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:41.485905', 'step': 289, 'epoch': 1} {'type': 'loss', 'content': 0.17515158653259277, 'timestamp': '2025-09-10 02:32:41.487971', 'step': 290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:41.542361', 'step': 290, 'epoch': 1} {'type': 'loss', 'content': 0.17359595000743866, 'timestamp': '2025-09-10 02:32:41.544671', 'step': 291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:41.599636', 'step': 291, 'epoch': 1} {'type': 'loss', 'content': 0.2785312533378601, 'timestamp': '2025-09-10 02:32:41.605540', 'step': 292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:41.659695', 'step': 292, 'epoch': 1} {'type': 'loss', 'content': 0.3376835286617279, 'timestamp': '2025-09-10 02:32:41.661606', 'step': 293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:41.715940', 'step': 293, 'epoch': 1} {'type': 'loss', 'content': 0.20759201049804688, 'timestamp': '2025-09-10 02:32:41.717964', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:41.772139', 'step': 294, 'epoch': 1} {'type': 'loss', 'content': 0.1808428317308426, 'timestamp': '2025-09-10 02:32:41.773930', 'step': 295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:41.829110', 'step': 295, 'epoch': 1} {'type': 'loss', 'content': 0.16838005185127258, 'timestamp': '2025-09-10 02:32:41.834998', 'step': 296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:41.889318', 'step': 296, 'epoch': 1} {'type': 'loss', 'content': 0.17465966939926147, 'timestamp': '2025-09-10 02:32:41.891109', 'step': 297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:41.945453', 'step': 297, 'epoch': 1} {'type': 'loss', 'content': 0.13667283952236176, 'timestamp': '2025-09-10 02:32:41.947678', 'step': 298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:42.002022', 'step': 298, 'epoch': 1} {'type': 'loss', 'content': 0.2498626708984375, 'timestamp': '2025-09-10 02:32:42.003940', 'step': 299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:42.058921', 'step': 299, 'epoch': 1} {'type': 'loss', 'content': 0.2092202603816986, 'timestamp': '2025-09-10 02:32:42.065008', 'step': 300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:42.120095', 'step': 300, 'epoch': 1} {'type': 'loss', 'content': 0.2841796278953552, 'timestamp': '2025-09-10 02:32:42.122345', 'step': 301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:42.178080', 'step': 301, 'epoch': 1} {'type': 'loss', 'content': 0.24996306002140045, 'timestamp': '2025-09-10 02:32:42.180144', 'step': 302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:42.235168', 'step': 302, 'epoch': 1} {'type': 'loss', 'content': 0.19040529429912567, 'timestamp': '2025-09-10 02:32:42.236999', 'step': 303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:42.291592', 'step': 303, 'epoch': 1} {'type': 'loss', 'content': 0.20559446513652802, 'timestamp': '2025-09-10 02:32:42.297728', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:32:42.376479', 'step': 304, 'epoch': 1} {'type': 'loss', 'content': 0.22151139378547668, 'timestamp': '2025-09-10 02:32:42.387482', 'step': 305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:42.442482', 'step': 305, 'epoch': 1} {'type': 'loss', 'content': 0.2909967303276062, 'timestamp': '2025-09-10 02:32:42.444287', 'step': 306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:42.498261', 'step': 306, 'epoch': 1} {'type': 'loss', 'content': 0.23213601112365723, 'timestamp': '2025-09-10 02:32:42.500293', 'step': 307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:42.553731', 'step': 307, 'epoch': 1} {'type': 'loss', 'content': 0.1652245819568634, 'timestamp': '2025-09-10 02:32:42.560343', 'step': 308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:42.614364', 'step': 308, 'epoch': 1} {'type': 'loss', 'content': 0.16413530707359314, 'timestamp': '2025-09-10 02:32:42.616671', 'step': 309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:42.672480', 'step': 309, 'epoch': 1} {'type': 'loss', 'content': 0.23107250034809113, 'timestamp': '2025-09-10 02:32:42.674303', 'step': 310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:42.730895', 'step': 310, 'epoch': 1} {'type': 'loss', 'content': 0.20953235030174255, 'timestamp': '2025-09-10 02:32:42.732861', 'step': 311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:42.786126', 'step': 311, 'epoch': 1} {'type': 'loss', 'content': 0.18695034086704254, 'timestamp': '2025-09-10 02:32:42.791968', 'step': 312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:42.844559', 'step': 312, 'epoch': 1} {'type': 'loss', 'content': 0.13835640251636505, 'timestamp': '2025-09-10 02:32:42.846355', 'step': 313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:42.901975', 'step': 313, 'epoch': 1} {'type': 'loss', 'content': 0.20657037198543549, 'timestamp': '2025-09-10 02:32:42.904026', 'step': 314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:42.957939', 'step': 314, 'epoch': 1} {'type': 'loss', 'content': 0.2604976296424866, 'timestamp': '2025-09-10 02:32:42.960050', 'step': 315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:43.014207', 'step': 315, 'epoch': 1} {'type': 'loss', 'content': 0.1515236496925354, 'timestamp': '2025-09-10 02:32:43.020557', 'step': 316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:43.073748', 'step': 316, 'epoch': 1} {'type': 'loss', 'content': 0.1784118413925171, 'timestamp': '2025-09-10 02:32:43.075684', 'step': 317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:43.129465', 'step': 317, 'epoch': 1} {'type': 'loss', 'content': 0.13623057305812836, 'timestamp': '2025-09-10 02:32:43.131484', 'step': 318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:43.185958', 'step': 318, 'epoch': 1} {'type': 'loss', 'content': 0.21501685678958893, 'timestamp': '2025-09-10 02:32:43.187820', 'step': 319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:43.241288', 'step': 319, 'epoch': 1} {'type': 'loss', 'content': 0.25638002157211304, 'timestamp': '2025-09-10 02:32:43.247163', 'step': 320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:43.299892', 'step': 320, 'epoch': 1} {'type': 'loss', 'content': 0.21873223781585693, 'timestamp': '2025-09-10 02:32:43.301695', 'step': 321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:43.355024', 'step': 321, 'epoch': 1} {'type': 'loss', 'content': 0.18386685848236084, 'timestamp': '2025-09-10 02:32:43.356984', 'step': 322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:43.411411', 'step': 322, 'epoch': 1} {'type': 'loss', 'content': 0.21024291217327118, 'timestamp': '2025-09-10 02:32:43.413506', 'step': 323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:43.467698', 'step': 323, 'epoch': 1} {'type': 'loss', 'content': 0.1835528165102005, 'timestamp': '2025-09-10 02:32:43.473607', 'step': 324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:43.527593', 'step': 324, 'epoch': 1} {'type': 'loss', 'content': 0.16633234918117523, 'timestamp': '2025-09-10 02:32:43.529708', 'step': 325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:43.584448', 'step': 325, 'epoch': 1} {'type': 'loss', 'content': 0.30357667803764343, 'timestamp': '2025-09-10 02:32:43.586558', 'step': 326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:43.640962', 'step': 326, 'epoch': 1} {'type': 'loss', 'content': 0.1206614226102829, 'timestamp': '2025-09-10 02:32:43.643116', 'step': 327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:43.699050', 'step': 327, 'epoch': 1} {'type': 'loss', 'content': 0.22941388189792633, 'timestamp': '2025-09-10 02:32:43.705842', 'step': 328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:43.761159', 'step': 328, 'epoch': 1} {'type': 'loss', 'content': 0.25328221917152405, 'timestamp': '2025-09-10 02:32:43.763180', 'step': 329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:43.817417', 'step': 329, 'epoch': 1} {'type': 'loss', 'content': 0.168710395693779, 'timestamp': '2025-09-10 02:32:43.819341', 'step': 330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:43.873965', 'step': 330, 'epoch': 1} {'type': 'loss', 'content': 0.17429940402507782, 'timestamp': '2025-09-10 02:32:43.876077', 'step': 331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:43.931464', 'step': 331, 'epoch': 1} {'type': 'loss', 'content': 0.2523728013038635, 'timestamp': '2025-09-10 02:32:43.937707', 'step': 332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:43.991020', 'step': 332, 'epoch': 1} {'type': 'loss', 'content': 0.13942894339561462, 'timestamp': '2025-09-10 02:32:43.992798', 'step': 333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:44.048470', 'step': 333, 'epoch': 1} {'type': 'loss', 'content': 0.2729742228984833, 'timestamp': '2025-09-10 02:32:44.050618', 'step': 334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:44.104170', 'step': 334, 'epoch': 1} {'type': 'loss', 'content': 0.12865875661373138, 'timestamp': '2025-09-10 02:32:44.106100', 'step': 335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:44.161415', 'step': 335, 'epoch': 1} {'type': 'loss', 'content': 0.2015928030014038, 'timestamp': '2025-09-10 02:32:44.167612', 'step': 336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:44.222728', 'step': 336, 'epoch': 1} {'type': 'loss', 'content': 0.20323285460472107, 'timestamp': '2025-09-10 02:32:44.224870', 'step': 337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:44.279764', 'step': 337, 'epoch': 1} {'type': 'loss', 'content': 0.1649916023015976, 'timestamp': '2025-09-10 02:32:44.281879', 'step': 338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:44.337194', 'step': 338, 'epoch': 1} {'type': 'loss', 'content': 0.31262582540512085, 'timestamp': '2025-09-10 02:32:44.339492', 'step': 339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:44.394060', 'step': 339, 'epoch': 1} {'type': 'loss', 'content': 0.2712140679359436, 'timestamp': '2025-09-10 02:32:44.400311', 'step': 340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:44.453830', 'step': 340, 'epoch': 1} {'type': 'loss', 'content': 0.24683773517608643, 'timestamp': '2025-09-10 02:32:44.455811', 'step': 341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:44.511480', 'step': 341, 'epoch': 1} {'type': 'loss', 'content': 0.16876651346683502, 'timestamp': '2025-09-10 02:32:44.513468', 'step': 342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:44.568390', 'step': 342, 'epoch': 1} {'type': 'loss', 'content': 0.21496762335300446, 'timestamp': '2025-09-10 02:32:44.570958', 'step': 343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:44.625325', 'step': 343, 'epoch': 1} {'type': 'loss', 'content': 0.13773781061172485, 'timestamp': '2025-09-10 02:32:44.631637', 'step': 344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:44.685089', 'step': 344, 'epoch': 1} {'type': 'loss', 'content': 0.2718742787837982, 'timestamp': '2025-09-10 02:32:44.687364', 'step': 345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:44.741010', 'step': 345, 'epoch': 1} {'type': 'loss', 'content': 0.2649340331554413, 'timestamp': '2025-09-10 02:32:44.743011', 'step': 346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:44.798409', 'step': 346, 'epoch': 1} {'type': 'loss', 'content': 0.2598666250705719, 'timestamp': '2025-09-10 02:32:44.800543', 'step': 347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:44.854937', 'step': 347, 'epoch': 1} {'type': 'loss', 'content': 0.22440268099308014, 'timestamp': '2025-09-10 02:32:44.861187', 'step': 348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:44.914547', 'step': 348, 'epoch': 1} {'type': 'loss', 'content': 0.1947396695613861, 'timestamp': '2025-09-10 02:32:44.916459', 'step': 349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:44.971867', 'step': 349, 'epoch': 1} {'type': 'loss', 'content': 0.16746091842651367, 'timestamp': '2025-09-10 02:32:44.973703', 'step': 350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:45.028170', 'step': 350, 'epoch': 1} {'type': 'loss', 'content': 0.16889630258083344, 'timestamp': '2025-09-10 02:32:45.030250', 'step': 351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:45.092445', 'step': 351, 'epoch': 1} {'type': 'loss', 'content': 0.19186502695083618, 'timestamp': '2025-09-10 02:32:45.098527', 'step': 352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:45.154390', 'step': 352, 'epoch': 1} {'type': 'loss', 'content': 0.13550116121768951, 'timestamp': '2025-09-10 02:32:45.156451', 'step': 353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:45.211227', 'step': 353, 'epoch': 1} {'type': 'loss', 'content': 0.11983438581228256, 'timestamp': '2025-09-10 02:32:45.212873', 'step': 354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:45.268907', 'step': 354, 'epoch': 1} {'type': 'loss', 'content': 0.19634883105754852, 'timestamp': '2025-09-10 02:32:45.270546', 'step': 355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:45.324651', 'step': 355, 'epoch': 1} {'type': 'loss', 'content': 0.35302647948265076, 'timestamp': '2025-09-10 02:32:45.330926', 'step': 356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:45.385538', 'step': 356, 'epoch': 1} {'type': 'loss', 'content': 0.21528150141239166, 'timestamp': '2025-09-10 02:32:45.387745', 'step': 357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:45.442304', 'step': 357, 'epoch': 1} {'type': 'loss', 'content': 0.08072078973054886, 'timestamp': '2025-09-10 02:32:45.444415', 'step': 358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:45.499221', 'step': 358, 'epoch': 1} {'type': 'loss', 'content': 0.22002415359020233, 'timestamp': '2025-09-10 02:32:45.501307', 'step': 359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:45.555671', 'step': 359, 'epoch': 1} {'type': 'loss', 'content': 0.2179984748363495, 'timestamp': '2025-09-10 02:32:45.561910', 'step': 360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:45.615728', 'step': 360, 'epoch': 1} {'type': 'loss', 'content': 0.16250891983509064, 'timestamp': '2025-09-10 02:32:45.617964', 'step': 361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:45.672423', 'step': 361, 'epoch': 1} {'type': 'loss', 'content': 0.2139461785554886, 'timestamp': '2025-09-10 02:32:45.674308', 'step': 362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:45.727933', 'step': 362, 'epoch': 1} {'type': 'loss', 'content': 0.15961049497127533, 'timestamp': '2025-09-10 02:32:45.729984', 'step': 363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:45.783916', 'step': 363, 'epoch': 1} {'type': 'loss', 'content': 0.185376837849617, 'timestamp': '2025-09-10 02:32:45.789870', 'step': 364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:45.844107', 'step': 364, 'epoch': 1} {'type': 'loss', 'content': 0.24627326428890228, 'timestamp': '2025-09-10 02:32:45.846216', 'step': 365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:45.901389', 'step': 365, 'epoch': 1} {'type': 'loss', 'content': 0.19987230002880096, 'timestamp': '2025-09-10 02:32:45.903636', 'step': 366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:45.958716', 'step': 366, 'epoch': 1} {'type': 'loss', 'content': 0.33456942439079285, 'timestamp': '2025-09-10 02:32:45.960758', 'step': 367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:46.015422', 'step': 367, 'epoch': 1} {'type': 'loss', 'content': 0.1311514675617218, 'timestamp': '2025-09-10 02:32:46.021492', 'step': 368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:46.075011', 'step': 368, 'epoch': 1} {'type': 'loss', 'content': 0.22946897149085999, 'timestamp': '2025-09-10 02:32:46.077121', 'step': 369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:46.131164', 'step': 369, 'epoch': 1} {'type': 'loss', 'content': 0.12892025709152222, 'timestamp': '2025-09-10 02:32:46.133155', 'step': 370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:46.189193', 'step': 370, 'epoch': 1} {'type': 'loss', 'content': 0.22484907507896423, 'timestamp': '2025-09-10 02:32:46.191346', 'step': 371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:46.246395', 'step': 371, 'epoch': 1} {'type': 'loss', 'content': 0.2559179663658142, 'timestamp': '2025-09-10 02:32:46.252671', 'step': 372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:32:46.306759', 'step': 372, 'epoch': 1} {'type': 'loss', 'content': 0.14708150923252106, 'timestamp': '2025-09-10 02:32:46.309087', 'step': 373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:46.363524', 'step': 373, 'epoch': 1} {'type': 'loss', 'content': 0.21033383905887604, 'timestamp': '2025-09-10 02:32:46.365698', 'step': 374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:46.422684', 'step': 374, 'epoch': 1} {'type': 'loss', 'content': 0.15010355412960052, 'timestamp': '2025-09-10 02:32:46.424845', 'step': 375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:46.479957', 'step': 375, 'epoch': 1} {'type': 'loss', 'content': 0.1511424481868744, 'timestamp': '2025-09-10 02:32:46.486082', 'step': 376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:46.541680', 'step': 376, 'epoch': 1} {'type': 'loss', 'content': 0.2001281976699829, 'timestamp': '2025-09-10 02:32:46.543540', 'step': 377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:46.598726', 'step': 377, 'epoch': 1} {'type': 'loss', 'content': 0.1658744364976883, 'timestamp': '2025-09-10 02:32:46.600616', 'step': 378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:46.654584', 'step': 378, 'epoch': 1} {'type': 'loss', 'content': 0.2021329253911972, 'timestamp': '2025-09-10 02:32:46.656700', 'step': 379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:46.710804', 'step': 379, 'epoch': 1} {'type': 'loss', 'content': 0.2881085276603699, 'timestamp': '2025-09-10 02:32:46.716673', 'step': 380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:46.769893', 'step': 380, 'epoch': 1} {'type': 'loss', 'content': 0.254569411277771, 'timestamp': '2025-09-10 02:32:46.772273', 'step': 381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:46.826652', 'step': 381, 'epoch': 1} {'type': 'loss', 'content': 0.3278712034225464, 'timestamp': '2025-09-10 02:32:46.828710', 'step': 382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:46.882632', 'step': 382, 'epoch': 1} {'type': 'loss', 'content': 0.16257648169994354, 'timestamp': '2025-09-10 02:32:46.884519', 'step': 383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:46.939013', 'step': 383, 'epoch': 1} {'type': 'loss', 'content': 0.22626708447933197, 'timestamp': '2025-09-10 02:32:46.945210', 'step': 384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:47.000647', 'step': 384, 'epoch': 1} {'type': 'loss', 'content': 0.2002372145652771, 'timestamp': '2025-09-10 02:32:47.002737', 'step': 385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:47.056582', 'step': 385, 'epoch': 1} {'type': 'loss', 'content': 0.17462554574012756, 'timestamp': '2025-09-10 02:32:47.058715', 'step': 386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:47.113743', 'step': 386, 'epoch': 1} {'type': 'loss', 'content': 0.134200781583786, 'timestamp': '2025-09-10 02:32:47.115686', 'step': 387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:47.170242', 'step': 387, 'epoch': 1} {'type': 'loss', 'content': 0.26182225346565247, 'timestamp': '2025-09-10 02:32:47.176681', 'step': 388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:47.230816', 'step': 388, 'epoch': 1} {'type': 'loss', 'content': 0.17992766201496124, 'timestamp': '2025-09-10 02:32:47.233024', 'step': 389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:47.287095', 'step': 389, 'epoch': 1} {'type': 'loss', 'content': 0.2125210464000702, 'timestamp': '2025-09-10 02:32:47.289161', 'step': 390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:47.343114', 'step': 390, 'epoch': 1} {'type': 'loss', 'content': 0.2446945160627365, 'timestamp': '2025-09-10 02:32:47.345009', 'step': 391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:47.400672', 'step': 391, 'epoch': 1} {'type': 'loss', 'content': 0.2607359290122986, 'timestamp': '2025-09-10 02:32:47.406751', 'step': 392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:47.460215', 'step': 392, 'epoch': 1} {'type': 'loss', 'content': 0.17982158064842224, 'timestamp': '2025-09-10 02:32:47.462059', 'step': 393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:47.519772', 'step': 393, 'epoch': 1} {'type': 'loss', 'content': 0.13885575532913208, 'timestamp': '2025-09-10 02:32:47.521633', 'step': 394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:47.576728', 'step': 394, 'epoch': 1} {'type': 'loss', 'content': 0.3047522008419037, 'timestamp': '2025-09-10 02:32:47.578889', 'step': 395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:47.633946', 'step': 395, 'epoch': 1} {'type': 'loss', 'content': 0.12275240570306778, 'timestamp': '2025-09-10 02:32:47.639965', 'step': 396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:47.693720', 'step': 396, 'epoch': 1} {'type': 'loss', 'content': 0.13811807334423065, 'timestamp': '2025-09-10 02:32:47.695750', 'step': 397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:47.750866', 'step': 397, 'epoch': 1} {'type': 'loss', 'content': 0.2664540410041809, 'timestamp': '2025-09-10 02:32:47.752773', 'step': 398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:47.808492', 'step': 398, 'epoch': 1} {'type': 'loss', 'content': 0.2167162448167801, 'timestamp': '2025-09-10 02:32:47.810100', 'step': 399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:47.864204', 'step': 399, 'epoch': 1} {'type': 'loss', 'content': 0.14087055623531342, 'timestamp': '2025-09-10 02:32:47.869842', 'step': 400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:47.923797', 'step': 400, 'epoch': 1} {'type': 'loss', 'content': 0.2665315270423889, 'timestamp': '2025-09-10 02:32:47.925466', 'step': 401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:47.979405', 'step': 401, 'epoch': 1} {'type': 'loss', 'content': 0.20004235208034515, 'timestamp': '2025-09-10 02:32:47.981149', 'step': 402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:48.037215', 'step': 402, 'epoch': 1} {'type': 'loss', 'content': 0.17113454639911652, 'timestamp': '2025-09-10 02:32:48.039193', 'step': 403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:48.094130', 'step': 403, 'epoch': 1} {'type': 'loss', 'content': 0.148561030626297, 'timestamp': '2025-09-10 02:32:48.100136', 'step': 404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:48.159067', 'step': 404, 'epoch': 1} {'type': 'loss', 'content': 0.21295927464962006, 'timestamp': '2025-09-10 02:32:48.160907', 'step': 405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:48.215669', 'step': 405, 'epoch': 1} {'type': 'loss', 'content': 0.21022342145442963, 'timestamp': '2025-09-10 02:32:48.217666', 'step': 406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:48.271982', 'step': 406, 'epoch': 1} {'type': 'loss', 'content': 0.15256977081298828, 'timestamp': '2025-09-10 02:32:48.273865', 'step': 407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:48.329000', 'step': 407, 'epoch': 1} {'type': 'loss', 'content': 0.19174502789974213, 'timestamp': '2025-09-10 02:32:48.334497', 'step': 408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:48.387339', 'step': 408, 'epoch': 1} {'type': 'loss', 'content': 0.14120179414749146, 'timestamp': '2025-09-10 02:32:48.388912', 'step': 409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:48.443219', 'step': 409, 'epoch': 1} {'type': 'loss', 'content': 0.23957841098308563, 'timestamp': '2025-09-10 02:32:48.445811', 'step': 410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:48.500326', 'step': 410, 'epoch': 1} {'type': 'loss', 'content': 0.2424345314502716, 'timestamp': '2025-09-10 02:32:48.502165', 'step': 411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:48.557213', 'step': 411, 'epoch': 1} {'type': 'loss', 'content': 0.20357073843479156, 'timestamp': '2025-09-10 02:32:48.563555', 'step': 412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:48.618947', 'step': 412, 'epoch': 1} {'type': 'loss', 'content': 0.20202061533927917, 'timestamp': '2025-09-10 02:32:48.621468', 'step': 413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:48.675653', 'step': 413, 'epoch': 1} {'type': 'loss', 'content': 0.22625739872455597, 'timestamp': '2025-09-10 02:32:48.678248', 'step': 414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:48.732850', 'step': 414, 'epoch': 1} {'type': 'loss', 'content': 0.17955823242664337, 'timestamp': '2025-09-10 02:32:48.734851', 'step': 415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:48.788988', 'step': 415, 'epoch': 1} {'type': 'loss', 'content': 0.1695263832807541, 'timestamp': '2025-09-10 02:32:48.794630', 'step': 416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:48.848492', 'step': 416, 'epoch': 1} {'type': 'loss', 'content': 0.12578018009662628, 'timestamp': '2025-09-10 02:32:48.850226', 'step': 417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:48.903756', 'step': 417, 'epoch': 1} {'type': 'loss', 'content': 0.23624497652053833, 'timestamp': '2025-09-10 02:32:48.905534', 'step': 418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:48.959493', 'step': 418, 'epoch': 1} {'type': 'loss', 'content': 0.12069395929574966, 'timestamp': '2025-09-10 02:32:48.961112', 'step': 419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:49.014870', 'step': 419, 'epoch': 1} {'type': 'loss', 'content': 0.1685531735420227, 'timestamp': '2025-09-10 02:32:49.020774', 'step': 420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:49.073963', 'step': 420, 'epoch': 1} {'type': 'loss', 'content': 0.17230895161628723, 'timestamp': '2025-09-10 02:32:49.075809', 'step': 421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:49.129986', 'step': 421, 'epoch': 1} {'type': 'loss', 'content': 0.22220034897327423, 'timestamp': '2025-09-10 02:32:49.131974', 'step': 422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:49.185416', 'step': 422, 'epoch': 1} {'type': 'loss', 'content': 0.26635217666625977, 'timestamp': '2025-09-10 02:32:49.187227', 'step': 423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:49.240794', 'step': 423, 'epoch': 1} {'type': 'loss', 'content': 0.1640133112668991, 'timestamp': '2025-09-10 02:32:49.246553', 'step': 424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:49.299361', 'step': 424, 'epoch': 1} {'type': 'loss', 'content': 0.15525633096694946, 'timestamp': '2025-09-10 02:32:49.300937', 'step': 425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:49.354454', 'step': 425, 'epoch': 1} {'type': 'loss', 'content': 0.2099037617444992, 'timestamp': '2025-09-10 02:32:49.356005', 'step': 426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:49.408727', 'step': 426, 'epoch': 1} {'type': 'loss', 'content': 0.3323684632778168, 'timestamp': '2025-09-10 02:32:49.410365', 'step': 427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:49.463781', 'step': 427, 'epoch': 1} {'type': 'loss', 'content': 0.24385938048362732, 'timestamp': '2025-09-10 02:32:49.469525', 'step': 428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:49.523152', 'step': 428, 'epoch': 1} {'type': 'loss', 'content': 0.1757970005273819, 'timestamp': '2025-09-10 02:32:49.524842', 'step': 429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:49.579204', 'step': 429, 'epoch': 1} {'type': 'loss', 'content': 0.13142283260822296, 'timestamp': '2025-09-10 02:32:49.581907', 'step': 430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:49.638085', 'step': 430, 'epoch': 1} {'type': 'loss', 'content': 0.23489028215408325, 'timestamp': '2025-09-10 02:32:49.640321', 'step': 431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:49.695674', 'step': 431, 'epoch': 1} {'type': 'loss', 'content': 0.15885449945926666, 'timestamp': '2025-09-10 02:32:49.701700', 'step': 432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:49.755671', 'step': 432, 'epoch': 1} {'type': 'loss', 'content': 0.1514926701784134, 'timestamp': '2025-09-10 02:32:49.757544', 'step': 433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:49.811292', 'step': 433, 'epoch': 1} {'type': 'loss', 'content': 0.15576396882534027, 'timestamp': '2025-09-10 02:32:49.813013', 'step': 434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:49.866484', 'step': 434, 'epoch': 1} {'type': 'loss', 'content': 0.29898104071617126, 'timestamp': '2025-09-10 02:32:49.868301', 'step': 435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:49.921639', 'step': 435, 'epoch': 1} {'type': 'loss', 'content': 0.11256994307041168, 'timestamp': '2025-09-10 02:32:49.927655', 'step': 436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:49.980574', 'step': 436, 'epoch': 1} {'type': 'loss', 'content': 0.23385612666606903, 'timestamp': '2025-09-10 02:32:49.982642', 'step': 437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:50.036216', 'step': 437, 'epoch': 1} {'type': 'loss', 'content': 0.23173211514949799, 'timestamp': '2025-09-10 02:32:50.038229', 'step': 438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:50.091864', 'step': 438, 'epoch': 1} {'type': 'loss', 'content': 0.14846369624137878, 'timestamp': '2025-09-10 02:32:50.093707', 'step': 439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:50.147758', 'step': 439, 'epoch': 1} {'type': 'loss', 'content': 0.23555007576942444, 'timestamp': '2025-09-10 02:32:50.153799', 'step': 440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:50.207143', 'step': 440, 'epoch': 1} {'type': 'loss', 'content': 0.17180712521076202, 'timestamp': '2025-09-10 02:32:50.208721', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:50.261683', 'step': 441, 'epoch': 1} {'type': 'loss', 'content': 0.1622963845729828, 'timestamp': '2025-09-10 02:32:50.263459', 'step': 442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:50.316689', 'step': 442, 'epoch': 1} {'type': 'loss', 'content': 0.1807824820280075, 'timestamp': '2025-09-10 02:32:50.318373', 'step': 443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:50.372153', 'step': 443, 'epoch': 1} {'type': 'loss', 'content': 0.21760539710521698, 'timestamp': '2025-09-10 02:32:50.377669', 'step': 444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:50.430842', 'step': 444, 'epoch': 1} {'type': 'loss', 'content': 0.2651597857475281, 'timestamp': '2025-09-10 02:32:50.432660', 'step': 445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:50.487672', 'step': 445, 'epoch': 1} {'type': 'loss', 'content': 0.22573716938495636, 'timestamp': '2025-09-10 02:32:50.489666', 'step': 446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:32:50.545605', 'step': 446, 'epoch': 1} {'type': 'loss', 'content': 0.2447405904531479, 'timestamp': '2025-09-10 02:32:50.547743', 'step': 447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:50.602472', 'step': 447, 'epoch': 1} {'type': 'loss', 'content': 0.18694885075092316, 'timestamp': '2025-09-10 02:32:50.608457', 'step': 448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:50.662089', 'step': 448, 'epoch': 1} {'type': 'loss', 'content': 0.2602568566799164, 'timestamp': '2025-09-10 02:32:50.664194', 'step': 449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:50.719699', 'step': 449, 'epoch': 1} {'type': 'loss', 'content': 0.16974902153015137, 'timestamp': '2025-09-10 02:32:50.721627', 'step': 450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:50.776172', 'step': 450, 'epoch': 1} {'type': 'loss', 'content': 0.16736295819282532, 'timestamp': '2025-09-10 02:32:50.777975', 'step': 451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:50.833155', 'step': 451, 'epoch': 1} {'type': 'loss', 'content': 0.22581550478935242, 'timestamp': '2025-09-10 02:32:50.838893', 'step': 452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:50.892711', 'step': 452, 'epoch': 1} {'type': 'loss', 'content': 0.23530800640583038, 'timestamp': '2025-09-10 02:32:50.894379', 'step': 453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:50.949218', 'step': 453, 'epoch': 1} {'type': 'loss', 'content': 0.19812993705272675, 'timestamp': '2025-09-10 02:32:50.951055', 'step': 454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:51.005583', 'step': 454, 'epoch': 1} {'type': 'loss', 'content': 0.23222047090530396, 'timestamp': '2025-09-10 02:32:51.007504', 'step': 455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:51.063977', 'step': 455, 'epoch': 1} {'type': 'loss', 'content': 0.25441867113113403, 'timestamp': '2025-09-10 02:32:51.070016', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:32:51.124068', 'step': 456, 'epoch': 1} {'type': 'loss', 'content': 0.24820168316364288, 'timestamp': '2025-09-10 02:32:51.125895', 'step': 457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:51.180367', 'step': 457, 'epoch': 1} {'type': 'loss', 'content': 0.19088876247406006, 'timestamp': '2025-09-10 02:32:51.182242', 'step': 458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:51.237202', 'step': 458, 'epoch': 1} {'type': 'loss', 'content': 0.24598966538906097, 'timestamp': '2025-09-10 02:32:51.238971', 'step': 459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:51.293172', 'step': 459, 'epoch': 1} {'type': 'loss', 'content': 0.23837989568710327, 'timestamp': '2025-09-10 02:32:51.299110', 'step': 460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:32:51.353412', 'step': 460, 'epoch': 1} {'type': 'loss', 'content': 0.22886775434017181, 'timestamp': '2025-09-10 02:32:51.355200', 'step': 461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:51.410334', 'step': 461, 'epoch': 1} {'type': 'loss', 'content': 0.24548675119876862, 'timestamp': '2025-09-10 02:32:51.412106', 'step': 462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:32:51.467457', 'step': 462, 'epoch': 1} {'type': 'loss', 'content': 0.15245740115642548, 'timestamp': '2025-09-10 02:32:51.469620', 'step': 463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:51.524920', 'step': 463, 'epoch': 1} {'type': 'loss', 'content': 0.25466740131378174, 'timestamp': '2025-09-10 02:32:51.532773', 'step': 464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:32:51.588812', 'step': 464, 'epoch': 1} {'type': 'loss', 'content': 0.15772508084774017, 'timestamp': '2025-09-10 02:32:51.590661', 'step': 465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:32:51.644931', 'step': 465, 'epoch': 1} {'type': 'loss', 'content': 0.19768887758255005, 'timestamp': '2025-09-10 02:32:51.646842', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:33:04.281838', 'step': 466, 'epoch': 1} {'type': 'pplx', 'content': 8164.013082573138, 'timestamp': '2025-09-10 02:33:04.284559', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:04.339635', 'step': 466, 'epoch': 1} {'type': 'loss', 'content': 0.23354069888591766, 'timestamp': '2025-09-10 02:33:04.341497', 'step': 467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:04.396824', 'step': 467, 'epoch': 1} {'type': 'loss', 'content': 0.21056629717350006, 'timestamp': '2025-09-10 02:33:04.402708', 'step': 468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:04.454907', 'step': 468, 'epoch': 1} {'type': 'loss', 'content': 0.21368445456027985, 'timestamp': '2025-09-10 02:33:04.456864', 'step': 469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:04.509831', 'step': 469, 'epoch': 1} {'type': 'loss', 'content': 0.10661043971776962, 'timestamp': '2025-09-10 02:33:04.511880', 'step': 470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:04.564760', 'step': 470, 'epoch': 1} {'type': 'loss', 'content': 0.23185119032859802, 'timestamp': '2025-09-10 02:33:04.566728', 'step': 471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:04.619911', 'step': 471, 'epoch': 1} {'type': 'loss', 'content': 0.24602626264095306, 'timestamp': '2025-09-10 02:33:04.625535', 'step': 472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:04.677603', 'step': 472, 'epoch': 1} {'type': 'loss', 'content': 0.1885097324848175, 'timestamp': '2025-09-10 02:33:04.679459', 'step': 473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:04.734107', 'step': 473, 'epoch': 1} {'type': 'loss', 'content': 0.10940184444189072, 'timestamp': '2025-09-10 02:33:04.736013', 'step': 474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:04.788330', 'step': 474, 'epoch': 1} {'type': 'loss', 'content': 0.2954287827014923, 'timestamp': '2025-09-10 02:33:04.790147', 'step': 475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:04.843554', 'step': 475, 'epoch': 1} {'type': 'loss', 'content': 0.15661638975143433, 'timestamp': '2025-09-10 02:33:04.849429', 'step': 476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:04.901436', 'step': 476, 'epoch': 1} {'type': 'loss', 'content': 0.15499569475650787, 'timestamp': '2025-09-10 02:33:04.903229', 'step': 477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:04.955770', 'step': 477, 'epoch': 1} {'type': 'loss', 'content': 0.12715384364128113, 'timestamp': '2025-09-10 02:33:04.957570', 'step': 478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:05.011679', 'step': 478, 'epoch': 1} {'type': 'loss', 'content': 0.15247046947479248, 'timestamp': '2025-09-10 02:33:05.013484', 'step': 479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:05.066808', 'step': 479, 'epoch': 1} {'type': 'loss', 'content': 0.2921454906463623, 'timestamp': '2025-09-10 02:33:05.072506', 'step': 480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:33:05.126225', 'step': 480, 'epoch': 1} {'type': 'loss', 'content': 0.2228945791721344, 'timestamp': '2025-09-10 02:33:05.128055', 'step': 481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:05.180872', 'step': 481, 'epoch': 1} {'type': 'loss', 'content': 0.19149255752563477, 'timestamp': '2025-09-10 02:33:05.182708', 'step': 482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:05.235503', 'step': 482, 'epoch': 1} {'type': 'loss', 'content': 0.16100256145000458, 'timestamp': '2025-09-10 02:33:05.237488', 'step': 483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:05.294087', 'step': 483, 'epoch': 1} {'type': 'loss', 'content': 0.2669464349746704, 'timestamp': '2025-09-10 02:33:05.299793', 'step': 484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:05.352728', 'step': 484, 'epoch': 1} {'type': 'loss', 'content': 0.18524521589279175, 'timestamp': '2025-09-10 02:33:05.354574', 'step': 485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:05.409162', 'step': 485, 'epoch': 1} {'type': 'loss', 'content': 0.1662960648536682, 'timestamp': '2025-09-10 02:33:05.415650', 'step': 486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:05.470513', 'step': 486, 'epoch': 1} {'type': 'loss', 'content': 0.11255188286304474, 'timestamp': '2025-09-10 02:33:05.472641', 'step': 487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:05.524915', 'step': 487, 'epoch': 1} {'type': 'loss', 'content': 0.2622673213481903, 'timestamp': '2025-09-10 02:33:05.530620', 'step': 488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:05.585291', 'step': 488, 'epoch': 1} {'type': 'loss', 'content': 0.27254605293273926, 'timestamp': '2025-09-10 02:33:05.588925', 'step': 489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:05.644238', 'step': 489, 'epoch': 1} {'type': 'loss', 'content': 0.16754281520843506, 'timestamp': '2025-09-10 02:33:05.647819', 'step': 490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:05.700858', 'step': 490, 'epoch': 1} {'type': 'loss', 'content': 0.2040957361459732, 'timestamp': '2025-09-10 02:33:05.702763', 'step': 491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:05.757483', 'step': 491, 'epoch': 1} {'type': 'loss', 'content': 0.2235639989376068, 'timestamp': '2025-09-10 02:33:05.763370', 'step': 492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:05.814840', 'step': 492, 'epoch': 1} {'type': 'loss', 'content': 0.12649403512477875, 'timestamp': '2025-09-10 02:33:05.816662', 'step': 493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:05.869143', 'step': 493, 'epoch': 1} {'type': 'loss', 'content': 0.23685438930988312, 'timestamp': '2025-09-10 02:33:05.871006', 'step': 494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:05.924062', 'step': 494, 'epoch': 1} {'type': 'loss', 'content': 0.2254285365343094, 'timestamp': '2025-09-10 02:33:05.925904', 'step': 495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:05.980969', 'step': 495, 'epoch': 1} {'type': 'loss', 'content': 0.23885104060173035, 'timestamp': '2025-09-10 02:33:05.986686', 'step': 496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:06.042635', 'step': 496, 'epoch': 1} {'type': 'loss', 'content': 0.2702237665653229, 'timestamp': '2025-09-10 02:33:06.045203', 'step': 497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:06.099140', 'step': 497, 'epoch': 1} {'type': 'loss', 'content': 0.2134568989276886, 'timestamp': '2025-09-10 02:33:06.101034', 'step': 498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:06.165693', 'step': 498, 'epoch': 1} {'type': 'loss', 'content': 0.23393553495407104, 'timestamp': '2025-09-10 02:33:06.167721', 'step': 499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:06.229720', 'step': 499, 'epoch': 1} {'type': 'loss', 'content': 0.17426344752311707, 'timestamp': '2025-09-10 02:33:06.235958', 'step': 500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-09-10 02:33:06.624922', 'step': 500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:06.692999', 'step': 500, 'epoch': 1} {'type': 'loss', 'content': 0.10849744826555252, 'timestamp': '2025-09-10 02:33:06.694828', 'step': 501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:06.749582', 'step': 501, 'epoch': 1} {'type': 'loss', 'content': 0.2227509766817093, 'timestamp': '2025-09-10 02:33:06.751528', 'step': 502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:06.805070', 'step': 502, 'epoch': 1} {'type': 'loss', 'content': 0.15734462440013885, 'timestamp': '2025-09-10 02:33:06.806933', 'step': 503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:06.859817', 'step': 503, 'epoch': 1} {'type': 'loss', 'content': 0.15095554292201996, 'timestamp': '2025-09-10 02:33:06.865708', 'step': 504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:06.917512', 'step': 504, 'epoch': 1} {'type': 'loss', 'content': 0.1578710973262787, 'timestamp': '2025-09-10 02:33:06.922147', 'step': 505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:06.986101', 'step': 505, 'epoch': 1} {'type': 'loss', 'content': 0.2111903727054596, 'timestamp': '2025-09-10 02:33:06.989479', 'step': 506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:07.044294', 'step': 506, 'epoch': 1} {'type': 'loss', 'content': 0.27453163266181946, 'timestamp': '2025-09-10 02:33:07.046193', 'step': 507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:07.101391', 'step': 507, 'epoch': 1} {'type': 'loss', 'content': 0.13830073177814484, 'timestamp': '2025-09-10 02:33:07.107454', 'step': 508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:07.162825', 'step': 508, 'epoch': 1} {'type': 'loss', 'content': 0.18524643778800964, 'timestamp': '2025-09-10 02:33:07.164686', 'step': 509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:07.218600', 'step': 509, 'epoch': 1} {'type': 'loss', 'content': 0.14612455666065216, 'timestamp': '2025-09-10 02:33:07.220429', 'step': 510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:07.273787', 'step': 510, 'epoch': 1} {'type': 'loss', 'content': 0.13254575431346893, 'timestamp': '2025-09-10 02:33:07.275799', 'step': 511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:07.331081', 'step': 511, 'epoch': 1} {'type': 'loss', 'content': 0.14923901855945587, 'timestamp': '2025-09-10 02:33:07.336911', 'step': 512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:07.390072', 'step': 512, 'epoch': 1} {'type': 'loss', 'content': 0.2825201451778412, 'timestamp': '2025-09-10 02:33:07.392071', 'step': 513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:07.447328', 'step': 513, 'epoch': 1} {'type': 'loss', 'content': 0.1916590929031372, 'timestamp': '2025-09-10 02:33:07.449346', 'step': 514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:07.504311', 'step': 514, 'epoch': 1} {'type': 'loss', 'content': 0.21794196963310242, 'timestamp': '2025-09-10 02:33:07.506258', 'step': 515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:07.560915', 'step': 515, 'epoch': 1} {'type': 'loss', 'content': 0.21530817449092865, 'timestamp': '2025-09-10 02:33:07.566868', 'step': 516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:07.621916', 'step': 516, 'epoch': 1} {'type': 'loss', 'content': 0.23540955781936646, 'timestamp': '2025-09-10 02:33:07.623750', 'step': 517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:07.679420', 'step': 517, 'epoch': 1} {'type': 'loss', 'content': 0.12473855167627335, 'timestamp': '2025-09-10 02:33:07.681275', 'step': 518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:07.736641', 'step': 518, 'epoch': 1} {'type': 'loss', 'content': 0.2160056084394455, 'timestamp': '2025-09-10 02:33:07.738547', 'step': 519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:07.793823', 'step': 519, 'epoch': 1} {'type': 'loss', 'content': 0.17260536551475525, 'timestamp': '2025-09-10 02:33:07.800003', 'step': 520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:07.853085', 'step': 520, 'epoch': 1} {'type': 'loss', 'content': 0.15378838777542114, 'timestamp': '2025-09-10 02:33:07.855021', 'step': 521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:07.910257', 'step': 521, 'epoch': 1} {'type': 'loss', 'content': 0.29711610078811646, 'timestamp': '2025-09-10 02:33:07.912177', 'step': 522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:07.965750', 'step': 522, 'epoch': 1} {'type': 'loss', 'content': 0.20601238310337067, 'timestamp': '2025-09-10 02:33:07.967647', 'step': 523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:08.023099', 'step': 523, 'epoch': 1} {'type': 'loss', 'content': 0.19543251395225525, 'timestamp': '2025-09-10 02:33:08.029206', 'step': 524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:08.082964', 'step': 524, 'epoch': 1} {'type': 'loss', 'content': 0.11351727694272995, 'timestamp': '2025-09-10 02:33:08.085092', 'step': 525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:08.138617', 'step': 525, 'epoch': 1} {'type': 'loss', 'content': 0.2465730756521225, 'timestamp': '2025-09-10 02:33:08.140571', 'step': 526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:08.193541', 'step': 526, 'epoch': 1} {'type': 'loss', 'content': 0.18559446930885315, 'timestamp': '2025-09-10 02:33:08.195544', 'step': 527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:33:08.249140', 'step': 527, 'epoch': 1} {'type': 'loss', 'content': 0.17522358894348145, 'timestamp': '2025-09-10 02:33:08.255232', 'step': 528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:08.307513', 'step': 528, 'epoch': 1} {'type': 'loss', 'content': 0.1548980325460434, 'timestamp': '2025-09-10 02:33:08.309456', 'step': 529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:08.362127', 'step': 529, 'epoch': 1} {'type': 'loss', 'content': 0.20142890512943268, 'timestamp': '2025-09-10 02:33:08.363951', 'step': 530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:08.416180', 'step': 530, 'epoch': 1} {'type': 'loss', 'content': 0.15421868860721588, 'timestamp': '2025-09-10 02:33:08.418030', 'step': 531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:08.471448', 'step': 531, 'epoch': 1} {'type': 'loss', 'content': 0.16774988174438477, 'timestamp': '2025-09-10 02:33:08.477277', 'step': 532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:33:08.533823', 'step': 532, 'epoch': 1} {'type': 'loss', 'content': 0.1505819708108902, 'timestamp': '2025-09-10 02:33:08.535960', 'step': 533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:33:08.589995', 'step': 533, 'epoch': 1} {'type': 'loss', 'content': 0.2078768014907837, 'timestamp': '2025-09-10 02:33:08.592088', 'step': 534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:08.646080', 'step': 534, 'epoch': 1} {'type': 'loss', 'content': 0.385840505361557, 'timestamp': '2025-09-10 02:33:08.648000', 'step': 535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:08.701842', 'step': 535, 'epoch': 1} {'type': 'loss', 'content': 0.1849924623966217, 'timestamp': '2025-09-10 02:33:08.707774', 'step': 536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:08.760506', 'step': 536, 'epoch': 1} {'type': 'loss', 'content': 0.12713921070098877, 'timestamp': '2025-09-10 02:33:08.762545', 'step': 537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:08.815602', 'step': 537, 'epoch': 1} {'type': 'loss', 'content': 0.09930123388767242, 'timestamp': '2025-09-10 02:33:08.818820', 'step': 538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:08.873195', 'step': 538, 'epoch': 1} {'type': 'loss', 'content': 0.2423621267080307, 'timestamp': '2025-09-10 02:33:08.875224', 'step': 539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:08.928611', 'step': 539, 'epoch': 1} {'type': 'loss', 'content': 0.18934373557567596, 'timestamp': '2025-09-10 02:33:08.935606', 'step': 540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:08.990725', 'step': 540, 'epoch': 1} {'type': 'loss', 'content': 0.19799882173538208, 'timestamp': '2025-09-10 02:33:08.992578', 'step': 541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:09.045502', 'step': 541, 'epoch': 1} {'type': 'loss', 'content': 0.19045403599739075, 'timestamp': '2025-09-10 02:33:09.047441', 'step': 542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:09.101618', 'step': 542, 'epoch': 1} {'type': 'loss', 'content': 0.1333310306072235, 'timestamp': '2025-09-10 02:33:09.103646', 'step': 543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:09.157071', 'step': 543, 'epoch': 1} {'type': 'loss', 'content': 0.21987798810005188, 'timestamp': '2025-09-10 02:33:09.163203', 'step': 544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:09.216504', 'step': 544, 'epoch': 1} {'type': 'loss', 'content': 0.1848263293504715, 'timestamp': '2025-09-10 02:33:09.218544', 'step': 545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:09.272679', 'step': 545, 'epoch': 1} {'type': 'loss', 'content': 0.21824902296066284, 'timestamp': '2025-09-10 02:33:09.274727', 'step': 546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:09.328215', 'step': 546, 'epoch': 1} {'type': 'loss', 'content': 0.1942027360200882, 'timestamp': '2025-09-10 02:33:09.330388', 'step': 547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:09.383385', 'step': 547, 'epoch': 1} {'type': 'loss', 'content': 0.27528756856918335, 'timestamp': '2025-09-10 02:33:09.388946', 'step': 548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:09.441625', 'step': 548, 'epoch': 1} {'type': 'loss', 'content': 0.28911519050598145, 'timestamp': '2025-09-10 02:33:09.443911', 'step': 549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:09.497538', 'step': 549, 'epoch': 1} {'type': 'loss', 'content': 0.10573918372392654, 'timestamp': '2025-09-10 02:33:09.499701', 'step': 550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:09.553006', 'step': 550, 'epoch': 1} {'type': 'loss', 'content': 0.19228778779506683, 'timestamp': '2025-09-10 02:33:09.554708', 'step': 551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:09.607758', 'step': 551, 'epoch': 1} {'type': 'loss', 'content': 0.2347085177898407, 'timestamp': '2025-09-10 02:33:09.613948', 'step': 552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:09.670790', 'step': 552, 'epoch': 1} {'type': 'loss', 'content': 0.1781814694404602, 'timestamp': '2025-09-10 02:33:09.672879', 'step': 553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:09.728400', 'step': 553, 'epoch': 1} {'type': 'loss', 'content': 0.2245127260684967, 'timestamp': '2025-09-10 02:33:09.730535', 'step': 554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:09.785913', 'step': 554, 'epoch': 1} {'type': 'loss', 'content': 0.17687346041202545, 'timestamp': '2025-09-10 02:33:09.787843', 'step': 555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:09.842798', 'step': 555, 'epoch': 1} {'type': 'loss', 'content': 0.14505518972873688, 'timestamp': '2025-09-10 02:33:09.849109', 'step': 556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:09.902683', 'step': 556, 'epoch': 1} {'type': 'loss', 'content': 0.13596948981285095, 'timestamp': '2025-09-10 02:33:09.904794', 'step': 557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:09.958342', 'step': 557, 'epoch': 1} {'type': 'loss', 'content': 0.1092466488480568, 'timestamp': '2025-09-10 02:33:09.960420', 'step': 558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:10.014407', 'step': 558, 'epoch': 1} {'type': 'loss', 'content': 0.15649716556072235, 'timestamp': '2025-09-10 02:33:10.016472', 'step': 559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:10.071347', 'step': 559, 'epoch': 1} {'type': 'loss', 'content': 0.2643848955631256, 'timestamp': '2025-09-10 02:33:10.077330', 'step': 560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:10.131169', 'step': 560, 'epoch': 1} {'type': 'loss', 'content': 0.1697022020816803, 'timestamp': '2025-09-10 02:33:10.133227', 'step': 561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:10.187396', 'step': 561, 'epoch': 1} {'type': 'loss', 'content': 0.12236841022968292, 'timestamp': '2025-09-10 02:33:10.189404', 'step': 562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:10.244147', 'step': 562, 'epoch': 1} {'type': 'loss', 'content': 0.18591423332691193, 'timestamp': '2025-09-10 02:33:10.246109', 'step': 563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:10.300510', 'step': 563, 'epoch': 1} {'type': 'loss', 'content': 0.22435161471366882, 'timestamp': '2025-09-10 02:33:10.306431', 'step': 564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:33:10.359670', 'step': 564, 'epoch': 1} {'type': 'loss', 'content': 0.2917345464229584, 'timestamp': '2025-09-10 02:33:10.361621', 'step': 565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:10.414631', 'step': 565, 'epoch': 1} {'type': 'loss', 'content': 0.23735886812210083, 'timestamp': '2025-09-10 02:33:10.416574', 'step': 566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:10.470321', 'step': 566, 'epoch': 1} {'type': 'loss', 'content': 0.3171032965183258, 'timestamp': '2025-09-10 02:33:10.472315', 'step': 567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:10.525272', 'step': 567, 'epoch': 1} {'type': 'loss', 'content': 0.17357264459133148, 'timestamp': '2025-09-10 02:33:10.531320', 'step': 568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:10.585737', 'step': 568, 'epoch': 1} {'type': 'loss', 'content': 0.2460905909538269, 'timestamp': '2025-09-10 02:33:10.587713', 'step': 569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:10.641510', 'step': 569, 'epoch': 1} {'type': 'loss', 'content': 0.14440764486789703, 'timestamp': '2025-09-10 02:33:10.643578', 'step': 570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:10.697775', 'step': 570, 'epoch': 1} {'type': 'loss', 'content': 0.189398854970932, 'timestamp': '2025-09-10 02:33:10.699893', 'step': 571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:10.753733', 'step': 571, 'epoch': 1} {'type': 'loss', 'content': 0.18363672494888306, 'timestamp': '2025-09-10 02:33:10.760017', 'step': 572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:10.813261', 'step': 572, 'epoch': 1} {'type': 'loss', 'content': 0.14039269089698792, 'timestamp': '2025-09-10 02:33:10.815451', 'step': 573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:10.868905', 'step': 573, 'epoch': 1} {'type': 'loss', 'content': 0.31547126173973083, 'timestamp': '2025-09-10 02:33:10.870945', 'step': 574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:10.924673', 'step': 574, 'epoch': 1} {'type': 'loss', 'content': 0.13728418946266174, 'timestamp': '2025-09-10 02:33:10.926699', 'step': 575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:10.979659', 'step': 575, 'epoch': 1} {'type': 'loss', 'content': 0.19620603322982788, 'timestamp': '2025-09-10 02:33:10.985913', 'step': 576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:11.039023', 'step': 576, 'epoch': 1} {'type': 'loss', 'content': 0.10225114226341248, 'timestamp': '2025-09-10 02:33:11.041187', 'step': 577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:11.093732', 'step': 577, 'epoch': 1} {'type': 'loss', 'content': 0.2419523149728775, 'timestamp': '2025-09-10 02:33:11.095686', 'step': 578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:11.149036', 'step': 578, 'epoch': 1} {'type': 'loss', 'content': 0.2616659104824066, 'timestamp': '2025-09-10 02:33:11.151079', 'step': 579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:11.203751', 'step': 579, 'epoch': 1} {'type': 'loss', 'content': 0.17416727542877197, 'timestamp': '2025-09-10 02:33:11.209608', 'step': 580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:11.262612', 'step': 580, 'epoch': 1} {'type': 'loss', 'content': 0.14904402196407318, 'timestamp': '2025-09-10 02:33:11.264647', 'step': 581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:11.317200', 'step': 581, 'epoch': 1} {'type': 'loss', 'content': 0.1254083514213562, 'timestamp': '2025-09-10 02:33:11.319300', 'step': 582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:11.373151', 'step': 582, 'epoch': 1} {'type': 'loss', 'content': 0.20160898566246033, 'timestamp': '2025-09-10 02:33:11.375318', 'step': 583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:11.428051', 'step': 583, 'epoch': 1} {'type': 'loss', 'content': 0.20077913999557495, 'timestamp': '2025-09-10 02:33:11.433953', 'step': 584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:11.486440', 'step': 584, 'epoch': 1} {'type': 'loss', 'content': 0.14331497251987457, 'timestamp': '2025-09-10 02:33:11.488686', 'step': 585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:33:11.546359', 'step': 585, 'epoch': 1} {'type': 'loss', 'content': 0.12125294655561447, 'timestamp': '2025-09-10 02:33:11.548418', 'step': 586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:11.601937', 'step': 586, 'epoch': 1} {'type': 'loss', 'content': 0.15339680016040802, 'timestamp': '2025-09-10 02:33:11.603893', 'step': 587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:11.660690', 'step': 587, 'epoch': 1} {'type': 'loss', 'content': 0.18543963134288788, 'timestamp': '2025-09-10 02:33:11.666610', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:11.718741', 'step': 588, 'epoch': 1} {'type': 'loss', 'content': 0.24352459609508514, 'timestamp': '2025-09-10 02:33:11.720819', 'step': 589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:11.773816', 'step': 589, 'epoch': 1} {'type': 'loss', 'content': 0.17934344708919525, 'timestamp': '2025-09-10 02:33:11.775924', 'step': 590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:11.829754', 'step': 590, 'epoch': 1} {'type': 'loss', 'content': 0.2042299211025238, 'timestamp': '2025-09-10 02:33:11.831834', 'step': 591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:33:11.885345', 'step': 591, 'epoch': 1} {'type': 'loss', 'content': 0.18103642761707306, 'timestamp': '2025-09-10 02:33:11.891558', 'step': 592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:11.945979', 'step': 592, 'epoch': 1} {'type': 'loss', 'content': 0.2318769097328186, 'timestamp': '2025-09-10 02:33:11.948002', 'step': 593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:12.005453', 'step': 593, 'epoch': 1} {'type': 'loss', 'content': 0.14543436467647552, 'timestamp': '2025-09-10 02:33:12.007469', 'step': 594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:12.064815', 'step': 594, 'epoch': 1} {'type': 'loss', 'content': 0.15708154439926147, 'timestamp': '2025-09-10 02:33:12.066931', 'step': 595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:12.123781', 'step': 595, 'epoch': 1} {'type': 'loss', 'content': 0.361197829246521, 'timestamp': '2025-09-10 02:33:12.130270', 'step': 596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:12.185126', 'step': 596, 'epoch': 1} {'type': 'loss', 'content': 0.18870846927165985, 'timestamp': '2025-09-10 02:33:12.186955', 'step': 597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:12.242297', 'step': 597, 'epoch': 1} {'type': 'loss', 'content': 0.2202637791633606, 'timestamp': '2025-09-10 02:33:12.244295', 'step': 598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:12.301058', 'step': 598, 'epoch': 1} {'type': 'loss', 'content': 0.23992028832435608, 'timestamp': '2025-09-10 02:33:12.303064', 'step': 599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:12.360151', 'step': 599, 'epoch': 1} {'type': 'loss', 'content': 0.22888483107089996, 'timestamp': '2025-09-10 02:33:12.367185', 'step': 600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:12.423180', 'step': 600, 'epoch': 1} {'type': 'loss', 'content': 0.16379471123218536, 'timestamp': '2025-09-10 02:33:12.425186', 'step': 601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:12.479832', 'step': 601, 'epoch': 1} {'type': 'loss', 'content': 0.29586198925971985, 'timestamp': '2025-09-10 02:33:12.481683', 'step': 602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:12.535309', 'step': 602, 'epoch': 1} {'type': 'loss', 'content': 0.25271064043045044, 'timestamp': '2025-09-10 02:33:12.537304', 'step': 603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:12.590966', 'step': 603, 'epoch': 1} {'type': 'loss', 'content': 0.16628484427928925, 'timestamp': '2025-09-10 02:33:12.596790', 'step': 604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:12.649592', 'step': 604, 'epoch': 1} {'type': 'loss', 'content': 0.14812609553337097, 'timestamp': '2025-09-10 02:33:12.651657', 'step': 605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:12.705025', 'step': 605, 'epoch': 1} {'type': 'loss', 'content': 0.15019094944000244, 'timestamp': '2025-09-10 02:33:12.707010', 'step': 606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:12.760383', 'step': 606, 'epoch': 1} {'type': 'loss', 'content': 0.0822044312953949, 'timestamp': '2025-09-10 02:33:12.763399', 'step': 607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:12.817750', 'step': 607, 'epoch': 1} {'type': 'loss', 'content': 0.1633635312318802, 'timestamp': '2025-09-10 02:33:12.823570', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:12.876504', 'step': 608, 'epoch': 1} {'type': 'loss', 'content': 0.12175800651311874, 'timestamp': '2025-09-10 02:33:12.878573', 'step': 609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:12.931920', 'step': 609, 'epoch': 1} {'type': 'loss', 'content': 0.23339314758777618, 'timestamp': '2025-09-10 02:33:12.933937', 'step': 610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:12.986489', 'step': 610, 'epoch': 1} {'type': 'loss', 'content': 0.24937625229358673, 'timestamp': '2025-09-10 02:33:12.988487', 'step': 611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:13.041076', 'step': 611, 'epoch': 1} {'type': 'loss', 'content': 0.2234891951084137, 'timestamp': '2025-09-10 02:33:13.046941', 'step': 612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:13.099252', 'step': 612, 'epoch': 1} {'type': 'loss', 'content': 0.15775088965892792, 'timestamp': '2025-09-10 02:33:13.101061', 'step': 613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:13.153659', 'step': 613, 'epoch': 1} {'type': 'loss', 'content': 0.23237058520317078, 'timestamp': '2025-09-10 02:33:13.155658', 'step': 614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:13.208611', 'step': 614, 'epoch': 1} {'type': 'loss', 'content': 0.2308254837989807, 'timestamp': '2025-09-10 02:33:13.210840', 'step': 615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:13.264187', 'step': 615, 'epoch': 1} {'type': 'loss', 'content': 0.17295806109905243, 'timestamp': '2025-09-10 02:33:13.269921', 'step': 616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:13.323178', 'step': 616, 'epoch': 1} {'type': 'loss', 'content': 0.22742362320423126, 'timestamp': '2025-09-10 02:33:13.325326', 'step': 617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:13.385870', 'step': 617, 'epoch': 1} {'type': 'loss', 'content': 0.26104405522346497, 'timestamp': '2025-09-10 02:33:13.388111', 'step': 618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:13.448258', 'step': 618, 'epoch': 1} {'type': 'loss', 'content': 0.14952416718006134, 'timestamp': '2025-09-10 02:33:13.450509', 'step': 619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:13.510469', 'step': 619, 'epoch': 1} {'type': 'loss', 'content': 0.25497308373451233, 'timestamp': '2025-09-10 02:33:13.517583', 'step': 620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:13.576822', 'step': 620, 'epoch': 1} {'type': 'loss', 'content': 0.24783819913864136, 'timestamp': '2025-09-10 02:33:13.579048', 'step': 621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:13.639260', 'step': 621, 'epoch': 1} {'type': 'loss', 'content': 0.24496091902256012, 'timestamp': '2025-09-10 02:33:13.641215', 'step': 622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:13.698743', 'step': 622, 'epoch': 1} {'type': 'loss', 'content': 0.2557339668273926, 'timestamp': '2025-09-10 02:33:13.700811', 'step': 623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:13.755514', 'step': 623, 'epoch': 1} {'type': 'loss', 'content': 0.20934408903121948, 'timestamp': '2025-09-10 02:33:13.761443', 'step': 624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:13.815220', 'step': 624, 'epoch': 1} {'type': 'loss', 'content': 0.14076794683933258, 'timestamp': '2025-09-10 02:33:13.816923', 'step': 625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:13.870036', 'step': 625, 'epoch': 1} {'type': 'loss', 'content': 0.21036005020141602, 'timestamp': '2025-09-10 02:33:13.871883', 'step': 626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:13.926155', 'step': 626, 'epoch': 1} {'type': 'loss', 'content': 0.258213609457016, 'timestamp': '2025-09-10 02:33:13.928304', 'step': 627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:13.981619', 'step': 627, 'epoch': 1} {'type': 'loss', 'content': 0.16438819468021393, 'timestamp': '2025-09-10 02:33:13.987664', 'step': 628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:14.040422', 'step': 628, 'epoch': 1} {'type': 'loss', 'content': 0.2607092559337616, 'timestamp': '2025-09-10 02:33:14.042572', 'step': 629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:14.095709', 'step': 629, 'epoch': 1} {'type': 'loss', 'content': 0.15359055995941162, 'timestamp': '2025-09-10 02:33:14.097834', 'step': 630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:14.154703', 'step': 630, 'epoch': 1} {'type': 'loss', 'content': 0.15216737985610962, 'timestamp': '2025-09-10 02:33:14.156708', 'step': 631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:14.217799', 'step': 631, 'epoch': 1} {'type': 'loss', 'content': 0.19808420538902283, 'timestamp': '2025-09-10 02:33:14.223352', 'step': 632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:14.279798', 'step': 632, 'epoch': 1} {'type': 'loss', 'content': 0.13842850923538208, 'timestamp': '2025-09-10 02:33:14.281800', 'step': 633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:14.335211', 'step': 633, 'epoch': 1} {'type': 'loss', 'content': 0.23684313893318176, 'timestamp': '2025-09-10 02:33:14.337197', 'step': 634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:14.390523', 'step': 634, 'epoch': 1} {'type': 'loss', 'content': 0.14249370992183685, 'timestamp': '2025-09-10 02:33:14.392651', 'step': 635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:14.446153', 'step': 635, 'epoch': 1} {'type': 'loss', 'content': 0.17707586288452148, 'timestamp': '2025-09-10 02:33:14.452081', 'step': 636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:14.505001', 'step': 636, 'epoch': 1} {'type': 'loss', 'content': 0.18002457916736603, 'timestamp': '2025-09-10 02:33:14.507117', 'step': 637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:14.561154', 'step': 637, 'epoch': 1} {'type': 'loss', 'content': 0.21243886649608612, 'timestamp': '2025-09-10 02:33:14.563191', 'step': 638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:14.616656', 'step': 638, 'epoch': 1} {'type': 'loss', 'content': 0.19253398478031158, 'timestamp': '2025-09-10 02:33:14.618584', 'step': 639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:14.673613', 'step': 639, 'epoch': 1} {'type': 'loss', 'content': 0.15164250135421753, 'timestamp': '2025-09-10 02:33:14.679317', 'step': 640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:14.731858', 'step': 640, 'epoch': 1} {'type': 'loss', 'content': 0.2127457857131958, 'timestamp': '2025-09-10 02:33:14.733859', 'step': 641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:14.787192', 'step': 641, 'epoch': 1} {'type': 'loss', 'content': 0.1852610558271408, 'timestamp': '2025-09-10 02:33:14.789211', 'step': 642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:14.842273', 'step': 642, 'epoch': 1} {'type': 'loss', 'content': 0.21487927436828613, 'timestamp': '2025-09-10 02:33:14.844299', 'step': 643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:14.897641', 'step': 643, 'epoch': 1} {'type': 'loss', 'content': 0.19651709496974945, 'timestamp': '2025-09-10 02:33:14.903367', 'step': 644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:14.957099', 'step': 644, 'epoch': 1} {'type': 'loss', 'content': 0.2109585404396057, 'timestamp': '2025-09-10 02:33:14.959317', 'step': 645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:15.015816', 'step': 645, 'epoch': 1} {'type': 'loss', 'content': 0.16693159937858582, 'timestamp': '2025-09-10 02:33:15.017819', 'step': 646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:15.070862', 'step': 646, 'epoch': 1} {'type': 'loss', 'content': 0.210213765501976, 'timestamp': '2025-09-10 02:33:15.072978', 'step': 647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:15.125748', 'step': 647, 'epoch': 1} {'type': 'loss', 'content': 0.24686604738235474, 'timestamp': '2025-09-10 02:33:15.131481', 'step': 648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:15.186407', 'step': 648, 'epoch': 1} {'type': 'loss', 'content': 0.18995538353919983, 'timestamp': '2025-09-10 02:33:15.188478', 'step': 649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:15.241686', 'step': 649, 'epoch': 1} {'type': 'loss', 'content': 0.1841956079006195, 'timestamp': '2025-09-10 02:33:15.243690', 'step': 650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:15.297328', 'step': 650, 'epoch': 1} {'type': 'loss', 'content': 0.3557662069797516, 'timestamp': '2025-09-10 02:33:15.299971', 'step': 651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:15.353422', 'step': 651, 'epoch': 1} {'type': 'loss', 'content': 0.09056880325078964, 'timestamp': '2025-09-10 02:33:15.359025', 'step': 652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:15.411329', 'step': 652, 'epoch': 1} {'type': 'loss', 'content': 0.1934080719947815, 'timestamp': '2025-09-10 02:33:15.413332', 'step': 653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:15.467814', 'step': 653, 'epoch': 1} {'type': 'loss', 'content': 0.21823982894420624, 'timestamp': '2025-09-10 02:33:15.469824', 'step': 654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:15.522984', 'step': 654, 'epoch': 1} {'type': 'loss', 'content': 0.21793816983699799, 'timestamp': '2025-09-10 02:33:15.525155', 'step': 655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:15.578225', 'step': 655, 'epoch': 1} {'type': 'loss', 'content': 0.10022778064012527, 'timestamp': '2025-09-10 02:33:15.584000', 'step': 656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:15.636683', 'step': 656, 'epoch': 1} {'type': 'loss', 'content': 0.16020841896533966, 'timestamp': '2025-09-10 02:33:15.638727', 'step': 657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:15.692068', 'step': 657, 'epoch': 1} {'type': 'loss', 'content': 0.13839758932590485, 'timestamp': '2025-09-10 02:33:15.694191', 'step': 658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:15.748242', 'step': 658, 'epoch': 1} {'type': 'loss', 'content': 0.19857537746429443, 'timestamp': '2025-09-10 02:33:15.750558', 'step': 659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:15.803988', 'step': 659, 'epoch': 1} {'type': 'loss', 'content': 0.15675826370716095, 'timestamp': '2025-09-10 02:33:15.809566', 'step': 660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:15.862603', 'step': 660, 'epoch': 1} {'type': 'loss', 'content': 0.19458648562431335, 'timestamp': '2025-09-10 02:33:15.864624', 'step': 661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:15.918808', 'step': 661, 'epoch': 1} {'type': 'loss', 'content': 0.2390935868024826, 'timestamp': '2025-09-10 02:33:15.920752', 'step': 662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:15.974055', 'step': 662, 'epoch': 1} {'type': 'loss', 'content': 0.2775176763534546, 'timestamp': '2025-09-10 02:33:15.975706', 'step': 663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:16.028699', 'step': 663, 'epoch': 1} {'type': 'loss', 'content': 0.22479070723056793, 'timestamp': '2025-09-10 02:33:16.034484', 'step': 664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:16.088216', 'step': 664, 'epoch': 1} {'type': 'loss', 'content': 0.2370791882276535, 'timestamp': '2025-09-10 02:33:16.090068', 'step': 665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:16.142470', 'step': 665, 'epoch': 1} {'type': 'loss', 'content': 0.1367626190185547, 'timestamp': '2025-09-10 02:33:16.144406', 'step': 666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:16.197195', 'step': 666, 'epoch': 1} {'type': 'loss', 'content': 0.20921052992343903, 'timestamp': '2025-09-10 02:33:16.199184', 'step': 667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:16.253229', 'step': 667, 'epoch': 1} {'type': 'loss', 'content': 0.20303763449192047, 'timestamp': '2025-09-10 02:33:16.259091', 'step': 668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:16.311226', 'step': 668, 'epoch': 1} {'type': 'loss', 'content': 0.36051562428474426, 'timestamp': '2025-09-10 02:33:16.313237', 'step': 669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:33:16.365972', 'step': 669, 'epoch': 1} {'type': 'loss', 'content': 0.260984867811203, 'timestamp': '2025-09-10 02:33:16.367871', 'step': 670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:16.426820', 'step': 670, 'epoch': 1} {'type': 'loss', 'content': 0.17895841598510742, 'timestamp': '2025-09-10 02:33:16.433355', 'step': 671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:16.492761', 'step': 671, 'epoch': 1} {'type': 'loss', 'content': 0.1869015246629715, 'timestamp': '2025-09-10 02:33:16.498528', 'step': 672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:16.561013', 'step': 672, 'epoch': 1} {'type': 'loss', 'content': 0.1820167452096939, 'timestamp': '2025-09-10 02:33:16.563213', 'step': 673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:16.631197', 'step': 673, 'epoch': 1} {'type': 'loss', 'content': 0.18509721755981445, 'timestamp': '2025-09-10 02:33:16.633255', 'step': 674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:16.698827', 'step': 674, 'epoch': 1} {'type': 'loss', 'content': 0.17870591580867767, 'timestamp': '2025-09-10 02:33:16.700907', 'step': 675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:16.762941', 'step': 675, 'epoch': 1} {'type': 'loss', 'content': 0.16756178438663483, 'timestamp': '2025-09-10 02:33:16.768644', 'step': 676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:16.829099', 'step': 676, 'epoch': 1} {'type': 'loss', 'content': 0.15972010791301727, 'timestamp': '2025-09-10 02:33:16.831016', 'step': 677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:16.894452', 'step': 677, 'epoch': 1} {'type': 'loss', 'content': 0.31483474373817444, 'timestamp': '2025-09-10 02:33:16.896496', 'step': 678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:16.955674', 'step': 678, 'epoch': 1} {'type': 'loss', 'content': 0.1961902529001236, 'timestamp': '2025-09-10 02:33:16.957582', 'step': 679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:17.019991', 'step': 679, 'epoch': 1} {'type': 'loss', 'content': 0.2118304967880249, 'timestamp': '2025-09-10 02:33:17.033773', 'step': 680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:17.089368', 'step': 680, 'epoch': 1} {'type': 'loss', 'content': 0.18956559896469116, 'timestamp': '2025-09-10 02:33:17.091283', 'step': 681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:17.150634', 'step': 681, 'epoch': 1} {'type': 'loss', 'content': 0.19474013149738312, 'timestamp': '2025-09-10 02:33:17.152536', 'step': 682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:17.215073', 'step': 682, 'epoch': 1} {'type': 'loss', 'content': 0.1896161437034607, 'timestamp': '2025-09-10 02:33:17.217083', 'step': 683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:17.282466', 'step': 683, 'epoch': 1} {'type': 'loss', 'content': 0.1496560126543045, 'timestamp': '2025-09-10 02:33:17.288192', 'step': 684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:17.349418', 'step': 684, 'epoch': 1} {'type': 'loss', 'content': 0.20922383666038513, 'timestamp': '2025-09-10 02:33:17.351330', 'step': 685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:17.404506', 'step': 685, 'epoch': 1} {'type': 'loss', 'content': 0.21560946106910706, 'timestamp': '2025-09-10 02:33:17.406533', 'step': 686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:17.461451', 'step': 686, 'epoch': 1} {'type': 'loss', 'content': 0.19071008265018463, 'timestamp': '2025-09-10 02:33:17.463549', 'step': 687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:17.515860', 'step': 687, 'epoch': 1} {'type': 'loss', 'content': 0.12984541058540344, 'timestamp': '2025-09-10 02:33:17.521722', 'step': 688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:17.574009', 'step': 688, 'epoch': 1} {'type': 'loss', 'content': 0.20176395773887634, 'timestamp': '2025-09-10 02:33:17.575829', 'step': 689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:17.628556', 'step': 689, 'epoch': 1} {'type': 'loss', 'content': 0.2606754004955292, 'timestamp': '2025-09-10 02:33:17.630530', 'step': 690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:17.683837', 'step': 690, 'epoch': 1} {'type': 'loss', 'content': 0.18533749878406525, 'timestamp': '2025-09-10 02:33:17.685941', 'step': 691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:17.738984', 'step': 691, 'epoch': 1} {'type': 'loss', 'content': 0.19552473723888397, 'timestamp': '2025-09-10 02:33:17.744521', 'step': 692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:17.798375', 'step': 692, 'epoch': 1} {'type': 'loss', 'content': 0.19633474946022034, 'timestamp': '2025-09-10 02:33:17.800423', 'step': 693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:17.853055', 'step': 693, 'epoch': 1} {'type': 'loss', 'content': 0.27553144097328186, 'timestamp': '2025-09-10 02:33:17.855195', 'step': 694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:17.908609', 'step': 694, 'epoch': 1} {'type': 'loss', 'content': 0.2654532492160797, 'timestamp': '2025-09-10 02:33:17.910371', 'step': 695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:17.962697', 'step': 695, 'epoch': 1} {'type': 'loss', 'content': 0.15403331816196442, 'timestamp': '2025-09-10 02:33:17.968310', 'step': 696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:18.022630', 'step': 696, 'epoch': 1} {'type': 'loss', 'content': 0.22080612182617188, 'timestamp': '2025-09-10 02:33:18.025584', 'step': 697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:18.079356', 'step': 697, 'epoch': 1} {'type': 'loss', 'content': 0.194203719496727, 'timestamp': '2025-09-10 02:33:18.081322', 'step': 698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:18.134837', 'step': 698, 'epoch': 1} {'type': 'loss', 'content': 0.2443476915359497, 'timestamp': '2025-09-10 02:33:18.136985', 'step': 699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:18.190245', 'step': 699, 'epoch': 1} {'type': 'loss', 'content': 0.10467502474784851, 'timestamp': '2025-09-10 02:33:18.196212', 'step': 700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:18.249024', 'step': 700, 'epoch': 1} {'type': 'loss', 'content': 0.24091270565986633, 'timestamp': '2025-09-10 02:33:18.251092', 'step': 701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:18.305091', 'step': 701, 'epoch': 1} {'type': 'loss', 'content': 0.15673546493053436, 'timestamp': '2025-09-10 02:33:18.307292', 'step': 702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:18.361134', 'step': 702, 'epoch': 1} {'type': 'loss', 'content': 0.2196313887834549, 'timestamp': '2025-09-10 02:33:18.363322', 'step': 703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:18.416558', 'step': 703, 'epoch': 1} {'type': 'loss', 'content': 0.2012418657541275, 'timestamp': '2025-09-10 02:33:18.422281', 'step': 704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:18.474606', 'step': 704, 'epoch': 1} {'type': 'loss', 'content': 0.21682429313659668, 'timestamp': '2025-09-10 02:33:18.476702', 'step': 705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:18.529926', 'step': 705, 'epoch': 1} {'type': 'loss', 'content': 0.22443892061710358, 'timestamp': '2025-09-10 02:33:18.532012', 'step': 706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:18.585084', 'step': 706, 'epoch': 1} {'type': 'loss', 'content': 0.27904775738716125, 'timestamp': '2025-09-10 02:33:18.587121', 'step': 707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:18.639913', 'step': 707, 'epoch': 1} {'type': 'loss', 'content': 0.18628068268299103, 'timestamp': '2025-09-10 02:33:18.645426', 'step': 708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:18.697865', 'step': 708, 'epoch': 1} {'type': 'loss', 'content': 0.1524551957845688, 'timestamp': '2025-09-10 02:33:18.699874', 'step': 709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:18.753999', 'step': 709, 'epoch': 1} {'type': 'loss', 'content': 0.19381724298000336, 'timestamp': '2025-09-10 02:33:18.755935', 'step': 710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:18.811155', 'step': 710, 'epoch': 1} {'type': 'loss', 'content': 0.19871266186237335, 'timestamp': '2025-09-10 02:33:18.814140', 'step': 711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:18.872150', 'step': 711, 'epoch': 1} {'type': 'loss', 'content': 0.14684171974658966, 'timestamp': '2025-09-10 02:33:18.877917', 'step': 712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:18.930809', 'step': 712, 'epoch': 1} {'type': 'loss', 'content': 0.2345811277627945, 'timestamp': '2025-09-10 02:33:18.932789', 'step': 713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:18.985914', 'step': 713, 'epoch': 1} {'type': 'loss', 'content': 0.17385390400886536, 'timestamp': '2025-09-10 02:33:18.988013', 'step': 714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:19.040704', 'step': 714, 'epoch': 1} {'type': 'loss', 'content': 0.16821852326393127, 'timestamp': '2025-09-10 02:33:19.042490', 'step': 715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:19.094942', 'step': 715, 'epoch': 1} {'type': 'loss', 'content': 0.18405382335186005, 'timestamp': '2025-09-10 02:33:19.100585', 'step': 716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:19.153006', 'step': 716, 'epoch': 1} {'type': 'loss', 'content': 0.1415056586265564, 'timestamp': '2025-09-10 02:33:19.155156', 'step': 717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:19.208871', 'step': 717, 'epoch': 1} {'type': 'loss', 'content': 0.2267748862504959, 'timestamp': '2025-09-10 02:33:19.211108', 'step': 718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:19.265361', 'step': 718, 'epoch': 1} {'type': 'loss', 'content': 0.21942058205604553, 'timestamp': '2025-09-10 02:33:19.267463', 'step': 719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:19.321979', 'step': 719, 'epoch': 1} {'type': 'loss', 'content': 0.14826184511184692, 'timestamp': '2025-09-10 02:33:19.327834', 'step': 720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:19.380890', 'step': 720, 'epoch': 1} {'type': 'loss', 'content': 0.11985126882791519, 'timestamp': '2025-09-10 02:33:19.382969', 'step': 721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:19.436203', 'step': 721, 'epoch': 1} {'type': 'loss', 'content': 0.20868149399757385, 'timestamp': '2025-09-10 02:33:19.438310', 'step': 722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:19.491058', 'step': 722, 'epoch': 1} {'type': 'loss', 'content': 0.21850880980491638, 'timestamp': '2025-09-10 02:33:19.493097', 'step': 723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:19.547691', 'step': 723, 'epoch': 1} {'type': 'loss', 'content': 0.09515904635190964, 'timestamp': '2025-09-10 02:33:19.553510', 'step': 724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:19.607159', 'step': 724, 'epoch': 1} {'type': 'loss', 'content': 0.2257605344057083, 'timestamp': '2025-09-10 02:33:19.609252', 'step': 725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:19.661813', 'step': 725, 'epoch': 1} {'type': 'loss', 'content': 0.2430477738380432, 'timestamp': '2025-09-10 02:33:19.663918', 'step': 726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:19.717222', 'step': 726, 'epoch': 1} {'type': 'loss', 'content': 0.14785173535346985, 'timestamp': '2025-09-10 02:33:19.719292', 'step': 727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:19.772945', 'step': 727, 'epoch': 1} {'type': 'loss', 'content': 0.16508959233760834, 'timestamp': '2025-09-10 02:33:19.778818', 'step': 728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:19.833136', 'step': 728, 'epoch': 1} {'type': 'loss', 'content': 0.1800948828458786, 'timestamp': '2025-09-10 02:33:19.835043', 'step': 729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:19.890991', 'step': 729, 'epoch': 1} {'type': 'loss', 'content': 0.17764197289943695, 'timestamp': '2025-09-10 02:33:19.893109', 'step': 730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:19.946564', 'step': 730, 'epoch': 1} {'type': 'loss', 'content': 0.2306884527206421, 'timestamp': '2025-09-10 02:33:19.948865', 'step': 731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:20.001939', 'step': 731, 'epoch': 1} {'type': 'loss', 'content': 0.159721240401268, 'timestamp': '2025-09-10 02:33:20.008108', 'step': 732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:20.061540', 'step': 732, 'epoch': 1} {'type': 'loss', 'content': 0.19410386681556702, 'timestamp': '2025-09-10 02:33:20.063552', 'step': 733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:20.116315', 'step': 733, 'epoch': 1} {'type': 'loss', 'content': 0.23563401401042938, 'timestamp': '2025-09-10 02:33:20.118578', 'step': 734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:20.173868', 'step': 734, 'epoch': 1} {'type': 'loss', 'content': 0.2981669008731842, 'timestamp': '2025-09-10 02:33:20.175926', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:20.229653', 'step': 735, 'epoch': 1} {'type': 'loss', 'content': 0.19916431605815887, 'timestamp': '2025-09-10 02:33:20.235511', 'step': 736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:20.287977', 'step': 736, 'epoch': 1} {'type': 'loss', 'content': 0.17049530148506165, 'timestamp': '2025-09-10 02:33:20.289950', 'step': 737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:20.343164', 'step': 737, 'epoch': 1} {'type': 'loss', 'content': 0.14282232522964478, 'timestamp': '2025-09-10 02:33:20.345183', 'step': 738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:20.399020', 'step': 738, 'epoch': 1} {'type': 'loss', 'content': 0.14755010604858398, 'timestamp': '2025-09-10 02:33:20.401179', 'step': 739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:20.455109', 'step': 739, 'epoch': 1} {'type': 'loss', 'content': 0.11048761755228043, 'timestamp': '2025-09-10 02:33:20.461208', 'step': 740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:20.514215', 'step': 740, 'epoch': 1} {'type': 'loss', 'content': 0.17023611068725586, 'timestamp': '2025-09-10 02:33:20.516288', 'step': 741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:20.569421', 'step': 741, 'epoch': 1} {'type': 'loss', 'content': 0.1653577834367752, 'timestamp': '2025-09-10 02:33:20.571610', 'step': 742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:20.625681', 'step': 742, 'epoch': 1} {'type': 'loss', 'content': 0.21554400026798248, 'timestamp': '2025-09-10 02:33:20.628150', 'step': 743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:20.681686', 'step': 743, 'epoch': 1} {'type': 'loss', 'content': 0.19030210375785828, 'timestamp': '2025-09-10 02:33:20.687798', 'step': 744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:20.742576', 'step': 744, 'epoch': 1} {'type': 'loss', 'content': 0.1569850742816925, 'timestamp': '2025-09-10 02:33:20.744506', 'step': 745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:20.799275', 'step': 745, 'epoch': 1} {'type': 'loss', 'content': 0.216524139046669, 'timestamp': '2025-09-10 02:33:20.801479', 'step': 746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:20.854549', 'step': 746, 'epoch': 1} {'type': 'loss', 'content': 0.19383099675178528, 'timestamp': '2025-09-10 02:33:20.856543', 'step': 747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:20.910169', 'step': 747, 'epoch': 1} {'type': 'loss', 'content': 0.21635045111179352, 'timestamp': '2025-09-10 02:33:20.915746', 'step': 748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:20.969164', 'step': 748, 'epoch': 1} {'type': 'loss', 'content': 0.21068060398101807, 'timestamp': '2025-09-10 02:33:20.970941', 'step': 749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:21.024178', 'step': 749, 'epoch': 1} {'type': 'loss', 'content': 0.2822466194629669, 'timestamp': '2025-09-10 02:33:21.025985', 'step': 750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:21.079131', 'step': 750, 'epoch': 1} {'type': 'loss', 'content': 0.24040687084197998, 'timestamp': '2025-09-10 02:33:21.080967', 'step': 751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:21.134405', 'step': 751, 'epoch': 1} {'type': 'loss', 'content': 0.2658994793891907, 'timestamp': '2025-09-10 02:33:21.140423', 'step': 752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:21.192531', 'step': 752, 'epoch': 1} {'type': 'loss', 'content': 0.18911221623420715, 'timestamp': '2025-09-10 02:33:21.194570', 'step': 753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:21.247008', 'step': 753, 'epoch': 1} {'type': 'loss', 'content': 0.24622422456741333, 'timestamp': '2025-09-10 02:33:21.248857', 'step': 754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:21.301253', 'step': 754, 'epoch': 1} {'type': 'loss', 'content': 0.16773034632205963, 'timestamp': '2025-09-10 02:33:21.302983', 'step': 755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:21.355436', 'step': 755, 'epoch': 1} {'type': 'loss', 'content': 0.17128176987171173, 'timestamp': '2025-09-10 02:33:21.361092', 'step': 756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:21.414560', 'step': 756, 'epoch': 1} {'type': 'loss', 'content': 0.1696222424507141, 'timestamp': '2025-09-10 02:33:21.416430', 'step': 757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:21.471061', 'step': 757, 'epoch': 1} {'type': 'loss', 'content': 0.3096977472305298, 'timestamp': '2025-09-10 02:33:21.472979', 'step': 758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:21.526604', 'step': 758, 'epoch': 1} {'type': 'loss', 'content': 0.14458313584327698, 'timestamp': '2025-09-10 02:33:21.528476', 'step': 759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:21.581884', 'step': 759, 'epoch': 1} {'type': 'loss', 'content': 0.13041001558303833, 'timestamp': '2025-09-10 02:33:21.588304', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:21.640841', 'step': 760, 'epoch': 1} {'type': 'loss', 'content': 0.13639317452907562, 'timestamp': '2025-09-10 02:33:21.642899', 'step': 761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:21.695535', 'step': 761, 'epoch': 1} {'type': 'loss', 'content': 0.15031719207763672, 'timestamp': '2025-09-10 02:33:21.697330', 'step': 762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:21.753414', 'step': 762, 'epoch': 1} {'type': 'loss', 'content': 0.29389163851737976, 'timestamp': '2025-09-10 02:33:21.755368', 'step': 763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:21.808088', 'step': 763, 'epoch': 1} {'type': 'loss', 'content': 0.167455792427063, 'timestamp': '2025-09-10 02:33:21.813593', 'step': 764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:21.865502', 'step': 764, 'epoch': 1} {'type': 'loss', 'content': 0.2989780306816101, 'timestamp': '2025-09-10 02:33:21.867379', 'step': 765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:21.921709', 'step': 765, 'epoch': 1} {'type': 'loss', 'content': 0.25428253412246704, 'timestamp': '2025-09-10 02:33:21.923716', 'step': 766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:21.978110', 'step': 766, 'epoch': 1} {'type': 'loss', 'content': 0.13011202216148376, 'timestamp': '2025-09-10 02:33:21.979902', 'step': 767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:22.033071', 'step': 767, 'epoch': 1} {'type': 'loss', 'content': 0.19253548979759216, 'timestamp': '2025-09-10 02:33:22.038861', 'step': 768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:22.091316', 'step': 768, 'epoch': 1} {'type': 'loss', 'content': 0.23708349466323853, 'timestamp': '2025-09-10 02:33:22.093297', 'step': 769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:22.146520', 'step': 769, 'epoch': 1} {'type': 'loss', 'content': 0.19050133228302002, 'timestamp': '2025-09-10 02:33:22.148634', 'step': 770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:22.201099', 'step': 770, 'epoch': 1} {'type': 'loss', 'content': 0.15874500572681427, 'timestamp': '2025-09-10 02:33:22.202897', 'step': 771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:22.255087', 'step': 771, 'epoch': 1} {'type': 'loss', 'content': 0.08556024730205536, 'timestamp': '2025-09-10 02:33:22.260705', 'step': 772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:22.314825', 'step': 772, 'epoch': 1} {'type': 'loss', 'content': 0.1940237283706665, 'timestamp': '2025-09-10 02:33:22.316687', 'step': 773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:22.369632', 'step': 773, 'epoch': 1} {'type': 'loss', 'content': 0.23914563655853271, 'timestamp': '2025-09-10 02:33:22.371536', 'step': 774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:22.424677', 'step': 774, 'epoch': 1} {'type': 'loss', 'content': 0.11119534820318222, 'timestamp': '2025-09-10 02:33:22.426630', 'step': 775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:22.480125', 'step': 775, 'epoch': 1} {'type': 'loss', 'content': 0.24296651780605316, 'timestamp': '2025-09-10 02:33:22.486219', 'step': 776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:22.539867', 'step': 776, 'epoch': 1} {'type': 'loss', 'content': 0.22889330983161926, 'timestamp': '2025-09-10 02:33:22.541728', 'step': 777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:22.594534', 'step': 777, 'epoch': 1} {'type': 'loss', 'content': 0.09181340038776398, 'timestamp': '2025-09-10 02:33:22.596574', 'step': 778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:22.650378', 'step': 778, 'epoch': 1} {'type': 'loss', 'content': 0.26727038621902466, 'timestamp': '2025-09-10 02:33:22.652169', 'step': 779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:22.707199', 'step': 779, 'epoch': 1} {'type': 'loss', 'content': 0.2600695788860321, 'timestamp': '2025-09-10 02:33:22.712707', 'step': 780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:22.764615', 'step': 780, 'epoch': 1} {'type': 'loss', 'content': 0.13874097168445587, 'timestamp': '2025-09-10 02:33:22.766532', 'step': 781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:22.820152', 'step': 781, 'epoch': 1} {'type': 'loss', 'content': 0.16163717210292816, 'timestamp': '2025-09-10 02:33:22.822027', 'step': 782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:22.877541', 'step': 782, 'epoch': 1} {'type': 'loss', 'content': 0.22159305214881897, 'timestamp': '2025-09-10 02:33:22.879573', 'step': 783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:22.933355', 'step': 783, 'epoch': 1} {'type': 'loss', 'content': 0.1997787058353424, 'timestamp': '2025-09-10 02:33:22.939012', 'step': 784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:22.991239', 'step': 784, 'epoch': 1} {'type': 'loss', 'content': 0.21245059370994568, 'timestamp': '2025-09-10 02:33:22.993373', 'step': 785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:23.046226', 'step': 785, 'epoch': 1} {'type': 'loss', 'content': 0.312531441450119, 'timestamp': '2025-09-10 02:33:23.048149', 'step': 786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:23.100883', 'step': 786, 'epoch': 1} {'type': 'loss', 'content': 0.22872495651245117, 'timestamp': '2025-09-10 02:33:23.102909', 'step': 787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:23.155977', 'step': 787, 'epoch': 1} {'type': 'loss', 'content': 0.14240270853042603, 'timestamp': '2025-09-10 02:33:23.161905', 'step': 788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:23.214811', 'step': 788, 'epoch': 1} {'type': 'loss', 'content': 0.16836875677108765, 'timestamp': '2025-09-10 02:33:23.216777', 'step': 789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:23.269428', 'step': 789, 'epoch': 1} {'type': 'loss', 'content': 0.3088100850582123, 'timestamp': '2025-09-10 02:33:23.271544', 'step': 790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:23.327513', 'step': 790, 'epoch': 1} {'type': 'loss', 'content': 0.21130429208278656, 'timestamp': '2025-09-10 02:33:23.329356', 'step': 791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:23.384243', 'step': 791, 'epoch': 1} {'type': 'loss', 'content': 0.18207046389579773, 'timestamp': '2025-09-10 02:33:23.389800', 'step': 792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:23.446671', 'step': 792, 'epoch': 1} {'type': 'loss', 'content': 0.1597559005022049, 'timestamp': '2025-09-10 02:33:23.448493', 'step': 793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:23.503377', 'step': 793, 'epoch': 1} {'type': 'loss', 'content': 0.2840641140937805, 'timestamp': '2025-09-10 02:33:23.505191', 'step': 794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:23.561379', 'step': 794, 'epoch': 1} {'type': 'loss', 'content': 0.16602149605751038, 'timestamp': '2025-09-10 02:33:23.563517', 'step': 795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:23.619209', 'step': 795, 'epoch': 1} {'type': 'loss', 'content': 0.14910154044628143, 'timestamp': '2025-09-10 02:33:23.624808', 'step': 796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:23.677022', 'step': 796, 'epoch': 1} {'type': 'loss', 'content': 0.12863442301750183, 'timestamp': '2025-09-10 02:33:23.679122', 'step': 797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:23.731874', 'step': 797, 'epoch': 1} {'type': 'loss', 'content': 0.1362622231245041, 'timestamp': '2025-09-10 02:33:23.733828', 'step': 798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:23.786622', 'step': 798, 'epoch': 1} {'type': 'loss', 'content': 0.24940820038318634, 'timestamp': '2025-09-10 02:33:23.788555', 'step': 799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:23.841836', 'step': 799, 'epoch': 1} {'type': 'loss', 'content': 0.28906455636024475, 'timestamp': '2025-09-10 02:33:23.847517', 'step': 800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:23.901388', 'step': 800, 'epoch': 1} {'type': 'loss', 'content': 0.19252443313598633, 'timestamp': '2025-09-10 02:33:23.903474', 'step': 801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:23.956224', 'step': 801, 'epoch': 1} {'type': 'loss', 'content': 0.14932642877101898, 'timestamp': '2025-09-10 02:33:23.958190', 'step': 802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:24.010931', 'step': 802, 'epoch': 1} {'type': 'loss', 'content': 0.23717327415943146, 'timestamp': '2025-09-10 02:33:24.012808', 'step': 803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:24.065644', 'step': 803, 'epoch': 1} {'type': 'loss', 'content': 0.1703455001115799, 'timestamp': '2025-09-10 02:33:24.071538', 'step': 804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:24.124801', 'step': 804, 'epoch': 1} {'type': 'loss', 'content': 0.11827253550291061, 'timestamp': '2025-09-10 02:33:24.126742', 'step': 805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:24.180510', 'step': 805, 'epoch': 1} {'type': 'loss', 'content': 0.21080954372882843, 'timestamp': '2025-09-10 02:33:24.182498', 'step': 806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:24.235387', 'step': 806, 'epoch': 1} {'type': 'loss', 'content': 0.2846800982952118, 'timestamp': '2025-09-10 02:33:24.237403', 'step': 807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:24.290863', 'step': 807, 'epoch': 1} {'type': 'loss', 'content': 0.1854870468378067, 'timestamp': '2025-09-10 02:33:24.296413', 'step': 808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:24.348261', 'step': 808, 'epoch': 1} {'type': 'loss', 'content': 0.21702565252780914, 'timestamp': '2025-09-10 02:33:24.350240', 'step': 809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:24.402699', 'step': 809, 'epoch': 1} {'type': 'loss', 'content': 0.1883232295513153, 'timestamp': '2025-09-10 02:33:24.404749', 'step': 810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:24.457192', 'step': 810, 'epoch': 1} {'type': 'loss', 'content': 0.2430826723575592, 'timestamp': '2025-09-10 02:33:24.459001', 'step': 811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:24.511696', 'step': 811, 'epoch': 1} {'type': 'loss', 'content': 0.19458141922950745, 'timestamp': '2025-09-10 02:33:24.517153', 'step': 812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:24.570191', 'step': 812, 'epoch': 1} {'type': 'loss', 'content': 0.16426563262939453, 'timestamp': '2025-09-10 02:33:24.572031', 'step': 813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:24.625487', 'step': 813, 'epoch': 1} {'type': 'loss', 'content': 0.2000683695077896, 'timestamp': '2025-09-10 02:33:24.627473', 'step': 814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:33:24.680997', 'step': 814, 'epoch': 1} {'type': 'loss', 'content': 0.1962025910615921, 'timestamp': '2025-09-10 02:33:24.682961', 'step': 815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:24.736145', 'step': 815, 'epoch': 1} {'type': 'loss', 'content': 0.19573090970516205, 'timestamp': '2025-09-10 02:33:24.741940', 'step': 816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:24.794125', 'step': 816, 'epoch': 1} {'type': 'loss', 'content': 0.20633773505687714, 'timestamp': '2025-09-10 02:33:24.796078', 'step': 817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:24.849992', 'step': 817, 'epoch': 1} {'type': 'loss', 'content': 0.17999255657196045, 'timestamp': '2025-09-10 02:33:24.852009', 'step': 818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:24.917470', 'step': 818, 'epoch': 1} {'type': 'loss', 'content': 0.1819702833890915, 'timestamp': '2025-09-10 02:33:24.919526', 'step': 819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:24.990261', 'step': 819, 'epoch': 1} {'type': 'loss', 'content': 0.1705261617898941, 'timestamp': '2025-09-10 02:33:24.996240', 'step': 820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:25.049880', 'step': 820, 'epoch': 1} {'type': 'loss', 'content': 0.26964858174324036, 'timestamp': '2025-09-10 02:33:25.051737', 'step': 821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:25.105105', 'step': 821, 'epoch': 1} {'type': 'loss', 'content': 0.18778209388256073, 'timestamp': '2025-09-10 02:33:25.106992', 'step': 822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:25.160615', 'step': 822, 'epoch': 1} {'type': 'loss', 'content': 0.15884830057621002, 'timestamp': '2025-09-10 02:33:25.162587', 'step': 823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:25.216258', 'step': 823, 'epoch': 1} {'type': 'loss', 'content': 0.14683212339878082, 'timestamp': '2025-09-10 02:33:25.221970', 'step': 824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:25.276249', 'step': 824, 'epoch': 1} {'type': 'loss', 'content': 0.254932165145874, 'timestamp': '2025-09-10 02:33:25.278062', 'step': 825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:25.332022', 'step': 825, 'epoch': 1} {'type': 'loss', 'content': 0.13095396757125854, 'timestamp': '2025-09-10 02:33:25.333853', 'step': 826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:25.389193', 'step': 826, 'epoch': 1} {'type': 'loss', 'content': 0.3122043311595917, 'timestamp': '2025-09-10 02:33:25.391074', 'step': 827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:25.444620', 'step': 827, 'epoch': 1} {'type': 'loss', 'content': 0.23181411623954773, 'timestamp': '2025-09-10 02:33:25.450631', 'step': 828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:25.502761', 'step': 828, 'epoch': 1} {'type': 'loss', 'content': 0.12799029052257538, 'timestamp': '2025-09-10 02:33:25.504646', 'step': 829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:25.557529', 'step': 829, 'epoch': 1} {'type': 'loss', 'content': 0.14913295209407806, 'timestamp': '2025-09-10 02:33:25.559359', 'step': 830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:25.612531', 'step': 830, 'epoch': 1} {'type': 'loss', 'content': 0.3489987254142761, 'timestamp': '2025-09-10 02:33:25.614289', 'step': 831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:25.666624', 'step': 831, 'epoch': 1} {'type': 'loss', 'content': 0.18072186410427094, 'timestamp': '2025-09-10 02:33:25.672369', 'step': 832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:25.724500', 'step': 832, 'epoch': 1} {'type': 'loss', 'content': 0.1454937607049942, 'timestamp': '2025-09-10 02:33:25.726567', 'step': 833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:25.780205', 'step': 833, 'epoch': 1} {'type': 'loss', 'content': 0.15107589960098267, 'timestamp': '2025-09-10 02:33:25.782224', 'step': 834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:25.835091', 'step': 834, 'epoch': 1} {'type': 'loss', 'content': 0.14038388431072235, 'timestamp': '2025-09-10 02:33:25.836939', 'step': 835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:25.890452', 'step': 835, 'epoch': 1} {'type': 'loss', 'content': 0.16467474400997162, 'timestamp': '2025-09-10 02:33:25.896063', 'step': 836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:25.948184', 'step': 836, 'epoch': 1} {'type': 'loss', 'content': 0.1916218250989914, 'timestamp': '2025-09-10 02:33:25.950014', 'step': 837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:26.005196', 'step': 837, 'epoch': 1} {'type': 'loss', 'content': 0.21133264899253845, 'timestamp': '2025-09-10 02:33:26.007004', 'step': 838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:26.059743', 'step': 838, 'epoch': 1} {'type': 'loss', 'content': 0.31645312905311584, 'timestamp': '2025-09-10 02:33:26.061559', 'step': 839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:26.113983', 'step': 839, 'epoch': 1} {'type': 'loss', 'content': 0.20644015073776245, 'timestamp': '2025-09-10 02:33:26.119563', 'step': 840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:26.171295', 'step': 840, 'epoch': 1} {'type': 'loss', 'content': 0.17765925824642181, 'timestamp': '2025-09-10 02:33:26.173330', 'step': 841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:26.226070', 'step': 841, 'epoch': 1} {'type': 'loss', 'content': 0.13471192121505737, 'timestamp': '2025-09-10 02:33:26.227882', 'step': 842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:26.281068', 'step': 842, 'epoch': 1} {'type': 'loss', 'content': 0.2273111790418625, 'timestamp': '2025-09-10 02:33:26.283200', 'step': 843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:26.335822', 'step': 843, 'epoch': 1} {'type': 'loss', 'content': 0.25607752799987793, 'timestamp': '2025-09-10 02:33:26.341245', 'step': 844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:26.393933', 'step': 844, 'epoch': 1} {'type': 'loss', 'content': 0.19410033524036407, 'timestamp': '2025-09-10 02:33:26.395898', 'step': 845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:26.448701', 'step': 845, 'epoch': 1} {'type': 'loss', 'content': 0.22756551206111908, 'timestamp': '2025-09-10 02:33:26.450422', 'step': 846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:26.503412', 'step': 846, 'epoch': 1} {'type': 'loss', 'content': 0.1825483739376068, 'timestamp': '2025-09-10 02:33:26.505614', 'step': 847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:26.558952', 'step': 847, 'epoch': 1} {'type': 'loss', 'content': 0.2391844093799591, 'timestamp': '2025-09-10 02:33:26.564590', 'step': 848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:26.617331', 'step': 848, 'epoch': 1} {'type': 'loss', 'content': 0.1508803367614746, 'timestamp': '2025-09-10 02:33:26.619315', 'step': 849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:26.671707', 'step': 849, 'epoch': 1} {'type': 'loss', 'content': 0.16566835343837738, 'timestamp': '2025-09-10 02:33:26.673618', 'step': 850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:26.726532', 'step': 850, 'epoch': 1} {'type': 'loss', 'content': 0.24400869011878967, 'timestamp': '2025-09-10 02:33:26.728430', 'step': 851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:26.781234', 'step': 851, 'epoch': 1} {'type': 'loss', 'content': 0.1602851301431656, 'timestamp': '2025-09-10 02:33:26.786923', 'step': 852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:26.840848', 'step': 852, 'epoch': 1} {'type': 'loss', 'content': 0.18192535638809204, 'timestamp': '2025-09-10 02:33:26.842667', 'step': 853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:26.898417', 'step': 853, 'epoch': 1} {'type': 'loss', 'content': 0.20893746614456177, 'timestamp': '2025-09-10 02:33:26.900467', 'step': 854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:26.957549', 'step': 854, 'epoch': 1} {'type': 'loss', 'content': 0.16696731746196747, 'timestamp': '2025-09-10 02:33:26.959603', 'step': 855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:27.015446', 'step': 855, 'epoch': 1} {'type': 'loss', 'content': 0.1649344116449356, 'timestamp': '2025-09-10 02:33:27.021595', 'step': 856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:27.075630', 'step': 856, 'epoch': 1} {'type': 'loss', 'content': 0.3092360496520996, 'timestamp': '2025-09-10 02:33:27.077748', 'step': 857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:27.132360', 'step': 857, 'epoch': 1} {'type': 'loss', 'content': 0.21187594532966614, 'timestamp': '2025-09-10 02:33:27.134350', 'step': 858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:27.187802', 'step': 858, 'epoch': 1} {'type': 'loss', 'content': 0.1328544318675995, 'timestamp': '2025-09-10 02:33:27.189944', 'step': 859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:27.243018', 'step': 859, 'epoch': 1} {'type': 'loss', 'content': 0.1761985719203949, 'timestamp': '2025-09-10 02:33:27.248788', 'step': 860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:27.300889', 'step': 860, 'epoch': 1} {'type': 'loss', 'content': 0.17282907664775848, 'timestamp': '2025-09-10 02:33:27.302781', 'step': 861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:27.355892', 'step': 861, 'epoch': 1} {'type': 'loss', 'content': 0.0815306156873703, 'timestamp': '2025-09-10 02:33:27.357831', 'step': 862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:27.411222', 'step': 862, 'epoch': 1} {'type': 'loss', 'content': 0.28033173084259033, 'timestamp': '2025-09-10 02:33:27.413325', 'step': 863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:27.466845', 'step': 863, 'epoch': 1} {'type': 'loss', 'content': 0.15185412764549255, 'timestamp': '2025-09-10 02:33:27.477022', 'step': 864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:27.532076', 'step': 864, 'epoch': 1} {'type': 'loss', 'content': 0.19585198163986206, 'timestamp': '2025-09-10 02:33:27.534113', 'step': 865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:27.598019', 'step': 865, 'epoch': 1} {'type': 'loss', 'content': 0.2089485079050064, 'timestamp': '2025-09-10 02:33:27.600131', 'step': 866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:27.654546', 'step': 866, 'epoch': 1} {'type': 'loss', 'content': 0.19411267340183258, 'timestamp': '2025-09-10 02:33:27.661004', 'step': 867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:27.721599', 'step': 867, 'epoch': 1} {'type': 'loss', 'content': 0.24333718419075012, 'timestamp': '2025-09-10 02:33:27.731616', 'step': 868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:27.787099', 'step': 868, 'epoch': 1} {'type': 'loss', 'content': 0.14669902622699738, 'timestamp': '2025-09-10 02:33:27.789023', 'step': 869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:27.844264', 'step': 869, 'epoch': 1} {'type': 'loss', 'content': 0.21946628391742706, 'timestamp': '2025-09-10 02:33:27.846124', 'step': 870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:27.907495', 'step': 870, 'epoch': 1} {'type': 'loss', 'content': 0.10997986793518066, 'timestamp': '2025-09-10 02:33:27.909499', 'step': 871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:27.971383', 'step': 871, 'epoch': 1} {'type': 'loss', 'content': 0.16716083884239197, 'timestamp': '2025-09-10 02:33:27.977460', 'step': 872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:28.040837', 'step': 872, 'epoch': 1} {'type': 'loss', 'content': 0.2145390510559082, 'timestamp': '2025-09-10 02:33:28.042969', 'step': 873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:28.099435', 'step': 873, 'epoch': 1} {'type': 'loss', 'content': 0.27648845314979553, 'timestamp': '2025-09-10 02:33:28.101456', 'step': 874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:28.155322', 'step': 874, 'epoch': 1} {'type': 'loss', 'content': 0.21361617743968964, 'timestamp': '2025-09-10 02:33:28.157515', 'step': 875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:28.215050', 'step': 875, 'epoch': 1} {'type': 'loss', 'content': 0.16814646124839783, 'timestamp': '2025-09-10 02:33:28.220940', 'step': 876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:28.278630', 'step': 876, 'epoch': 1} {'type': 'loss', 'content': 0.17705221474170685, 'timestamp': '2025-09-10 02:33:28.280816', 'step': 877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:28.341339', 'step': 877, 'epoch': 1} {'type': 'loss', 'content': 0.1491282731294632, 'timestamp': '2025-09-10 02:33:28.343499', 'step': 878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:28.397352', 'step': 878, 'epoch': 1} {'type': 'loss', 'content': 0.18711483478546143, 'timestamp': '2025-09-10 02:33:28.399214', 'step': 879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:28.453963', 'step': 879, 'epoch': 1} {'type': 'loss', 'content': 0.15773600339889526, 'timestamp': '2025-09-10 02:33:28.459847', 'step': 880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:28.526013', 'step': 880, 'epoch': 1} {'type': 'loss', 'content': 0.17570923268795013, 'timestamp': '2025-09-10 02:33:28.528041', 'step': 881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:28.581864', 'step': 881, 'epoch': 1} {'type': 'loss', 'content': 0.25591906905174255, 'timestamp': '2025-09-10 02:33:28.584349', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:28.638688', 'step': 882, 'epoch': 1} {'type': 'loss', 'content': 0.1760503351688385, 'timestamp': '2025-09-10 02:33:28.640917', 'step': 883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:28.697101', 'step': 883, 'epoch': 1} {'type': 'loss', 'content': 0.2180570513010025, 'timestamp': '2025-09-10 02:33:28.702787', 'step': 884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:28.756015', 'step': 884, 'epoch': 1} {'type': 'loss', 'content': 0.14197786152362823, 'timestamp': '2025-09-10 02:33:28.759672', 'step': 885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:28.813439', 'step': 885, 'epoch': 1} {'type': 'loss', 'content': 0.3427557945251465, 'timestamp': '2025-09-10 02:33:28.815267', 'step': 886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:28.868387', 'step': 886, 'epoch': 1} {'type': 'loss', 'content': 0.24389663338661194, 'timestamp': '2025-09-10 02:33:28.876377', 'step': 887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:28.930163', 'step': 887, 'epoch': 1} {'type': 'loss', 'content': 0.28050023317337036, 'timestamp': '2025-09-10 02:33:28.936032', 'step': 888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:28.997670', 'step': 888, 'epoch': 1} {'type': 'loss', 'content': 0.2450983077287674, 'timestamp': '2025-09-10 02:33:28.999453', 'step': 889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:29.064148', 'step': 889, 'epoch': 1} {'type': 'loss', 'content': 0.11167634278535843, 'timestamp': '2025-09-10 02:33:29.066091', 'step': 890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:29.124946', 'step': 890, 'epoch': 1} {'type': 'loss', 'content': 0.2255728393793106, 'timestamp': '2025-09-10 02:33:29.127002', 'step': 891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:29.181083', 'step': 891, 'epoch': 1} {'type': 'loss', 'content': 0.16230803728103638, 'timestamp': '2025-09-10 02:33:29.186836', 'step': 892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:29.239587', 'step': 892, 'epoch': 1} {'type': 'loss', 'content': 0.13003192842006683, 'timestamp': '2025-09-10 02:33:29.241627', 'step': 893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:29.293645', 'step': 893, 'epoch': 1} {'type': 'loss', 'content': 0.2037837654352188, 'timestamp': '2025-09-10 02:33:29.295502', 'step': 894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:29.348922', 'step': 894, 'epoch': 1} {'type': 'loss', 'content': 0.12267516553401947, 'timestamp': '2025-09-10 02:33:29.350846', 'step': 895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:29.403959', 'step': 895, 'epoch': 1} {'type': 'loss', 'content': 0.21746112406253815, 'timestamp': '2025-09-10 02:33:29.409483', 'step': 896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:29.462017', 'step': 896, 'epoch': 1} {'type': 'loss', 'content': 0.16194845736026764, 'timestamp': '2025-09-10 02:33:29.463873', 'step': 897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:29.516703', 'step': 897, 'epoch': 1} {'type': 'loss', 'content': 0.17028716206550598, 'timestamp': '2025-09-10 02:33:29.518664', 'step': 898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:29.572031', 'step': 898, 'epoch': 1} {'type': 'loss', 'content': 0.2297671139240265, 'timestamp': '2025-09-10 02:33:29.573830', 'step': 899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:29.627644', 'step': 899, 'epoch': 1} {'type': 'loss', 'content': 0.3500347435474396, 'timestamp': '2025-09-10 02:33:29.633570', 'step': 900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:29.688781', 'step': 900, 'epoch': 1} {'type': 'loss', 'content': 0.14189468324184418, 'timestamp': '2025-09-10 02:33:29.690678', 'step': 901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:29.744780', 'step': 901, 'epoch': 1} {'type': 'loss', 'content': 0.15646331012248993, 'timestamp': '2025-09-10 02:33:29.746927', 'step': 902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:29.801155', 'step': 902, 'epoch': 1} {'type': 'loss', 'content': 0.1846679002046585, 'timestamp': '2025-09-10 02:33:29.803027', 'step': 903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:29.856564', 'step': 903, 'epoch': 1} {'type': 'loss', 'content': 0.22309619188308716, 'timestamp': '2025-09-10 02:33:29.862460', 'step': 904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:29.915327', 'step': 904, 'epoch': 1} {'type': 'loss', 'content': 0.17539113759994507, 'timestamp': '2025-09-10 02:33:29.917322', 'step': 905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:29.970499', 'step': 905, 'epoch': 1} {'type': 'loss', 'content': 0.22119556367397308, 'timestamp': '2025-09-10 02:33:29.972430', 'step': 906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:30.025454', 'step': 906, 'epoch': 1} {'type': 'loss', 'content': 0.1473677009344101, 'timestamp': '2025-09-10 02:33:30.027431', 'step': 907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:30.079951', 'step': 907, 'epoch': 1} {'type': 'loss', 'content': 0.20106710493564606, 'timestamp': '2025-09-10 02:33:30.085531', 'step': 908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:30.138664', 'step': 908, 'epoch': 1} {'type': 'loss', 'content': 0.27220332622528076, 'timestamp': '2025-09-10 02:33:30.140649', 'step': 909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:30.193957', 'step': 909, 'epoch': 1} {'type': 'loss', 'content': 0.18164025247097015, 'timestamp': '2025-09-10 02:33:30.195843', 'step': 910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:30.249268', 'step': 910, 'epoch': 1} {'type': 'loss', 'content': 0.22074255347251892, 'timestamp': '2025-09-10 02:33:30.250997', 'step': 911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:30.305058', 'step': 911, 'epoch': 1} {'type': 'loss', 'content': 0.2125033140182495, 'timestamp': '2025-09-10 02:33:30.311161', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:30.363405', 'step': 912, 'epoch': 1} {'type': 'loss', 'content': 0.14678001403808594, 'timestamp': '2025-09-10 02:33:30.365205', 'step': 913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:30.418313', 'step': 913, 'epoch': 1} {'type': 'loss', 'content': 0.11349017918109894, 'timestamp': '2025-09-10 02:33:30.420287', 'step': 914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:33:30.473720', 'step': 914, 'epoch': 1} {'type': 'loss', 'content': 0.23448318243026733, 'timestamp': '2025-09-10 02:33:30.475656', 'step': 915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:30.529820', 'step': 915, 'epoch': 1} {'type': 'loss', 'content': 0.18858422338962555, 'timestamp': '2025-09-10 02:33:30.535345', 'step': 916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:30.587460', 'step': 916, 'epoch': 1} {'type': 'loss', 'content': 0.29812490940093994, 'timestamp': '2025-09-10 02:33:30.589524', 'step': 917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:30.642572', 'step': 917, 'epoch': 1} {'type': 'loss', 'content': 0.17137764394283295, 'timestamp': '2025-09-10 02:33:30.644569', 'step': 918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:30.697825', 'step': 918, 'epoch': 1} {'type': 'loss', 'content': 0.323367714881897, 'timestamp': '2025-09-10 02:33:30.699750', 'step': 919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:30.752545', 'step': 919, 'epoch': 1} {'type': 'loss', 'content': 0.131873220205307, 'timestamp': '2025-09-10 02:33:30.758329', 'step': 920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:30.810602', 'step': 920, 'epoch': 1} {'type': 'loss', 'content': 0.26032575964927673, 'timestamp': '2025-09-10 02:33:30.812767', 'step': 921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:30.866647', 'step': 921, 'epoch': 1} {'type': 'loss', 'content': 0.1657908856868744, 'timestamp': '2025-09-10 02:33:30.868533', 'step': 922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:30.921101', 'step': 922, 'epoch': 1} {'type': 'loss', 'content': 0.20349951088428497, 'timestamp': '2025-09-10 02:33:30.922945', 'step': 923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:30.974973', 'step': 923, 'epoch': 1} {'type': 'loss', 'content': 0.272433340549469, 'timestamp': '2025-09-10 02:33:30.980648', 'step': 924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:31.033438', 'step': 924, 'epoch': 1} {'type': 'loss', 'content': 0.14067257940769196, 'timestamp': '2025-09-10 02:33:31.036549', 'step': 925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:31.092637', 'step': 925, 'epoch': 1} {'type': 'loss', 'content': 0.2566711902618408, 'timestamp': '2025-09-10 02:33:31.094647', 'step': 926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:31.147762', 'step': 926, 'epoch': 1} {'type': 'loss', 'content': 0.28346502780914307, 'timestamp': '2025-09-10 02:33:31.149633', 'step': 927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:31.202329', 'step': 927, 'epoch': 1} {'type': 'loss', 'content': 0.16585895419120789, 'timestamp': '2025-09-10 02:33:31.208315', 'step': 928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:31.261272', 'step': 928, 'epoch': 1} {'type': 'loss', 'content': 0.15045976638793945, 'timestamp': '2025-09-10 02:33:31.263186', 'step': 929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:31.315670', 'step': 929, 'epoch': 1} {'type': 'loss', 'content': 0.17924387753009796, 'timestamp': '2025-09-10 02:33:31.317789', 'step': 930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:31.372165', 'step': 930, 'epoch': 1} {'type': 'loss', 'content': 0.17225374281406403, 'timestamp': '2025-09-10 02:33:31.374063', 'step': 931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:31.426840', 'step': 931, 'epoch': 1} {'type': 'loss', 'content': 0.32708895206451416, 'timestamp': '2025-09-10 02:33:31.432476', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:33:44.105864', 'step': 932, 'epoch': 1} {'type': 'pplx', 'content': 9249.231310710225, 'timestamp': '2025-09-10 02:33:44.108680', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:44.163409', 'step': 932, 'epoch': 1} {'type': 'loss', 'content': 0.11876490712165833, 'timestamp': '2025-09-10 02:33:44.165360', 'step': 933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:44.221645', 'step': 933, 'epoch': 1} {'type': 'loss', 'content': 0.13823802769184113, 'timestamp': '2025-09-10 02:33:44.223492', 'step': 934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:44.278415', 'step': 934, 'epoch': 1} {'type': 'loss', 'content': 0.26555031538009644, 'timestamp': '2025-09-10 02:33:44.280345', 'step': 935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:44.340590', 'step': 935, 'epoch': 1} {'type': 'loss', 'content': 0.13843834400177002, 'timestamp': '2025-09-10 02:33:44.346736', 'step': 936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:44.407025', 'step': 936, 'epoch': 1} {'type': 'loss', 'content': 0.23714999854564667, 'timestamp': '2025-09-10 02:33:44.408933', 'step': 937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:44.463693', 'step': 937, 'epoch': 1} {'type': 'loss', 'content': 0.28406548500061035, 'timestamp': '2025-09-10 02:33:44.465294', 'step': 938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:33:44.520724', 'step': 938, 'epoch': 1} {'type': 'loss', 'content': 0.23665496706962585, 'timestamp': '2025-09-10 02:33:44.522698', 'step': 939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:44.577595', 'step': 939, 'epoch': 1} {'type': 'loss', 'content': 0.22944827377796173, 'timestamp': '2025-09-10 02:33:44.583995', 'step': 940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:44.643989', 'step': 940, 'epoch': 1} {'type': 'loss', 'content': 0.1527492105960846, 'timestamp': '2025-09-10 02:33:44.645916', 'step': 941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:44.702724', 'step': 941, 'epoch': 1} {'type': 'loss', 'content': 0.24018356204032898, 'timestamp': '2025-09-10 02:33:44.704345', 'step': 942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:44.759759', 'step': 942, 'epoch': 1} {'type': 'loss', 'content': 0.217897430062294, 'timestamp': '2025-09-10 02:33:44.761619', 'step': 943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:44.817726', 'step': 943, 'epoch': 1} {'type': 'loss', 'content': 0.13182079792022705, 'timestamp': '2025-09-10 02:33:44.824125', 'step': 944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:44.878956', 'step': 944, 'epoch': 1} {'type': 'loss', 'content': 0.19500316679477692, 'timestamp': '2025-09-10 02:33:44.880960', 'step': 945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:44.935544', 'step': 945, 'epoch': 1} {'type': 'loss', 'content': 0.11362320929765701, 'timestamp': '2025-09-10 02:33:44.937516', 'step': 946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:44.992174', 'step': 946, 'epoch': 1} {'type': 'loss', 'content': 0.2332959920167923, 'timestamp': '2025-09-10 02:33:44.995410', 'step': 947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:45.053462', 'step': 947, 'epoch': 1} {'type': 'loss', 'content': 0.2314046174287796, 'timestamp': '2025-09-10 02:33:45.059824', 'step': 948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:45.117311', 'step': 948, 'epoch': 1} {'type': 'loss', 'content': 0.21372421085834503, 'timestamp': '2025-09-10 02:33:45.119270', 'step': 949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:45.174939', 'step': 949, 'epoch': 1} {'type': 'loss', 'content': 0.16858193278312683, 'timestamp': '2025-09-10 02:33:45.177016', 'step': 950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:45.233619', 'step': 950, 'epoch': 1} {'type': 'loss', 'content': 0.18282437324523926, 'timestamp': '2025-09-10 02:33:45.235773', 'step': 951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:45.290246', 'step': 951, 'epoch': 1} {'type': 'loss', 'content': 0.19609686732292175, 'timestamp': '2025-09-10 02:33:45.296656', 'step': 952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:45.351892', 'step': 952, 'epoch': 1} {'type': 'loss', 'content': 0.1080770269036293, 'timestamp': '2025-09-10 02:33:45.354006', 'step': 953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:45.409113', 'step': 953, 'epoch': 1} {'type': 'loss', 'content': 0.14018388092517853, 'timestamp': '2025-09-10 02:33:45.411179', 'step': 954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:45.468461', 'step': 954, 'epoch': 1} {'type': 'loss', 'content': 0.12864075601100922, 'timestamp': '2025-09-10 02:33:45.470548', 'step': 955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:45.528349', 'step': 955, 'epoch': 1} {'type': 'loss', 'content': 0.12596747279167175, 'timestamp': '2025-09-10 02:33:45.534905', 'step': 956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:45.590872', 'step': 956, 'epoch': 1} {'type': 'loss', 'content': 0.22123649716377258, 'timestamp': '2025-09-10 02:33:45.592895', 'step': 957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:45.653463', 'step': 957, 'epoch': 1} {'type': 'loss', 'content': 0.16715434193611145, 'timestamp': '2025-09-10 02:33:45.655777', 'step': 958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:45.714058', 'step': 958, 'epoch': 1} {'type': 'loss', 'content': 0.23957175016403198, 'timestamp': '2025-09-10 02:33:45.716342', 'step': 959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:45.773914', 'step': 959, 'epoch': 1} {'type': 'loss', 'content': 0.17274202406406403, 'timestamp': '2025-09-10 02:33:45.780688', 'step': 960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:45.840205', 'step': 960, 'epoch': 1} {'type': 'loss', 'content': 0.17130538821220398, 'timestamp': '2025-09-10 02:33:45.842642', 'step': 961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:45.900999', 'step': 961, 'epoch': 1} {'type': 'loss', 'content': 0.193582683801651, 'timestamp': '2025-09-10 02:33:45.903130', 'step': 962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:45.961418', 'step': 962, 'epoch': 1} {'type': 'loss', 'content': 0.19546069204807281, 'timestamp': '2025-09-10 02:33:45.963482', 'step': 963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:46.020098', 'step': 963, 'epoch': 1} {'type': 'loss', 'content': 0.2719533145427704, 'timestamp': '2025-09-10 02:33:46.026686', 'step': 964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:46.084652', 'step': 964, 'epoch': 1} {'type': 'loss', 'content': 0.2104797214269638, 'timestamp': '2025-09-10 02:33:46.086744', 'step': 965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:46.145213', 'step': 965, 'epoch': 1} {'type': 'loss', 'content': 0.25072115659713745, 'timestamp': '2025-09-10 02:33:46.147237', 'step': 966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:46.204961', 'step': 966, 'epoch': 1} {'type': 'loss', 'content': 0.20190338790416718, 'timestamp': '2025-09-10 02:33:46.207051', 'step': 967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:46.261854', 'step': 967, 'epoch': 1} {'type': 'loss', 'content': 0.2522697448730469, 'timestamp': '2025-09-10 02:33:46.268260', 'step': 968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:46.322614', 'step': 968, 'epoch': 1} {'type': 'loss', 'content': 0.17383185029029846, 'timestamp': '2025-09-10 02:33:46.324760', 'step': 969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:46.380341', 'step': 969, 'epoch': 1} {'type': 'loss', 'content': 0.265808641910553, 'timestamp': '2025-09-10 02:33:46.382375', 'step': 970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:46.437219', 'step': 970, 'epoch': 1} {'type': 'loss', 'content': 0.1619333177804947, 'timestamp': '2025-09-10 02:33:46.439260', 'step': 971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:46.497088', 'step': 971, 'epoch': 1} {'type': 'loss', 'content': 0.2538423240184784, 'timestamp': '2025-09-10 02:33:46.503247', 'step': 972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:46.558453', 'step': 972, 'epoch': 1} {'type': 'loss', 'content': 0.1776554435491562, 'timestamp': '2025-09-10 02:33:46.560487', 'step': 973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:46.616948', 'step': 973, 'epoch': 1} {'type': 'loss', 'content': 0.15598373115062714, 'timestamp': '2025-09-10 02:33:46.619062', 'step': 974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:46.675542', 'step': 974, 'epoch': 1} {'type': 'loss', 'content': 0.2350987195968628, 'timestamp': '2025-09-10 02:33:46.677588', 'step': 975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:46.738889', 'step': 975, 'epoch': 1} {'type': 'loss', 'content': 0.1737121194601059, 'timestamp': '2025-09-10 02:33:46.745106', 'step': 976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:46.798918', 'step': 976, 'epoch': 1} {'type': 'loss', 'content': 0.18187057971954346, 'timestamp': '2025-09-10 02:33:46.800977', 'step': 977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:46.859010', 'step': 977, 'epoch': 1} {'type': 'loss', 'content': 0.2140890210866928, 'timestamp': '2025-09-10 02:33:46.861161', 'step': 978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:46.916848', 'step': 978, 'epoch': 1} {'type': 'loss', 'content': 0.2686763405799866, 'timestamp': '2025-09-10 02:33:46.918875', 'step': 979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:46.974991', 'step': 979, 'epoch': 1} {'type': 'loss', 'content': 0.20225335657596588, 'timestamp': '2025-09-10 02:33:46.981050', 'step': 980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:47.038057', 'step': 980, 'epoch': 1} {'type': 'loss', 'content': 0.14123332500457764, 'timestamp': '2025-09-10 02:33:47.040017', 'step': 981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:47.095720', 'step': 981, 'epoch': 1} {'type': 'loss', 'content': 0.13986827433109283, 'timestamp': '2025-09-10 02:33:47.097758', 'step': 982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:47.156948', 'step': 982, 'epoch': 1} {'type': 'loss', 'content': 0.13207870721817017, 'timestamp': '2025-09-10 02:33:47.158971', 'step': 983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:47.212218', 'step': 983, 'epoch': 1} {'type': 'loss', 'content': 0.3553910255432129, 'timestamp': '2025-09-10 02:33:47.218158', 'step': 984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:47.271748', 'step': 984, 'epoch': 1} {'type': 'loss', 'content': 0.17018461227416992, 'timestamp': '2025-09-10 02:33:47.273851', 'step': 985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:47.331451', 'step': 985, 'epoch': 1} {'type': 'loss', 'content': 0.0875466838479042, 'timestamp': '2025-09-10 02:33:47.333493', 'step': 986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:47.390762', 'step': 986, 'epoch': 1} {'type': 'loss', 'content': 0.1586841195821762, 'timestamp': '2025-09-10 02:33:47.393123', 'step': 987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:47.448579', 'step': 987, 'epoch': 1} {'type': 'loss', 'content': 0.2994515001773834, 'timestamp': '2025-09-10 02:33:47.455108', 'step': 988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:47.510292', 'step': 988, 'epoch': 1} {'type': 'loss', 'content': 0.25306305289268494, 'timestamp': '2025-09-10 02:33:47.512487', 'step': 989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:47.567177', 'step': 989, 'epoch': 1} {'type': 'loss', 'content': 0.1643698364496231, 'timestamp': '2025-09-10 02:33:47.569232', 'step': 990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:47.623886', 'step': 990, 'epoch': 1} {'type': 'loss', 'content': 0.12199683487415314, 'timestamp': '2025-09-10 02:33:47.625905', 'step': 991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:47.680152', 'step': 991, 'epoch': 1} {'type': 'loss', 'content': 0.1878775805234909, 'timestamp': '2025-09-10 02:33:47.686433', 'step': 992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:47.739945', 'step': 992, 'epoch': 1} {'type': 'loss', 'content': 0.18936781585216522, 'timestamp': '2025-09-10 02:33:47.741969', 'step': 993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:47.796593', 'step': 993, 'epoch': 1} {'type': 'loss', 'content': 0.23409326374530792, 'timestamp': '2025-09-10 02:33:47.799396', 'step': 994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:47.855591', 'step': 994, 'epoch': 1} {'type': 'loss', 'content': 0.16703106462955475, 'timestamp': '2025-09-10 02:33:47.857564', 'step': 995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:47.910898', 'step': 995, 'epoch': 1} {'type': 'loss', 'content': 0.2105536162853241, 'timestamp': '2025-09-10 02:33:47.917140', 'step': 996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:47.970682', 'step': 996, 'epoch': 1} {'type': 'loss', 'content': 0.17847435176372528, 'timestamp': '2025-09-10 02:33:47.972667', 'step': 997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:48.024643', 'step': 997, 'epoch': 1} {'type': 'loss', 'content': 0.20319151878356934, 'timestamp': '2025-09-10 02:33:48.026796', 'step': 998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:48.079604', 'step': 998, 'epoch': 1} {'type': 'loss', 'content': 0.18898871541023254, 'timestamp': '2025-09-10 02:33:48.081671', 'step': 999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:48.135402', 'step': 999, 'epoch': 1} {'type': 'loss', 'content': 0.13685773313045502, 'timestamp': '2025-09-10 02:33:48.141220', 'step': 1000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-09-10 02:33:48.596173', 'step': 1000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:48.654671', 'step': 1000, 'epoch': 1} {'type': 'loss', 'content': 0.20991846919059753, 'timestamp': '2025-09-10 02:33:48.656623', 'step': 1001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:48.711073', 'step': 1001, 'epoch': 1} {'type': 'loss', 'content': 0.21368969976902008, 'timestamp': '2025-09-10 02:33:48.713262', 'step': 1002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:48.766571', 'step': 1002, 'epoch': 1} {'type': 'loss', 'content': 0.301998496055603, 'timestamp': '2025-09-10 02:33:48.768785', 'step': 1003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:48.820696', 'step': 1003, 'epoch': 1} {'type': 'loss', 'content': 0.19542887806892395, 'timestamp': '2025-09-10 02:33:48.826649', 'step': 1004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:48.878647', 'step': 1004, 'epoch': 1} {'type': 'loss', 'content': 0.2047732025384903, 'timestamp': '2025-09-10 02:33:48.880727', 'step': 1005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:48.933623', 'step': 1005, 'epoch': 1} {'type': 'loss', 'content': 0.1796729862689972, 'timestamp': '2025-09-10 02:33:48.935635', 'step': 1006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:48.988700', 'step': 1006, 'epoch': 1} {'type': 'loss', 'content': 0.20734526216983795, 'timestamp': '2025-09-10 02:33:48.990641', 'step': 1007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:49.043000', 'step': 1007, 'epoch': 1} {'type': 'loss', 'content': 0.15215907990932465, 'timestamp': '2025-09-10 02:33:49.048893', 'step': 1008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:49.101482', 'step': 1008, 'epoch': 1} {'type': 'loss', 'content': 0.23178987205028534, 'timestamp': '2025-09-10 02:33:49.103503', 'step': 1009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:49.155885', 'step': 1009, 'epoch': 1} {'type': 'loss', 'content': 0.1298394650220871, 'timestamp': '2025-09-10 02:33:49.157874', 'step': 1010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:49.213073', 'step': 1010, 'epoch': 1} {'type': 'loss', 'content': 0.21567557752132416, 'timestamp': '2025-09-10 02:33:49.215103', 'step': 1011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:49.268658', 'step': 1011, 'epoch': 1} {'type': 'loss', 'content': 0.1861243098974228, 'timestamp': '2025-09-10 02:33:49.274641', 'step': 1012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:49.326743', 'step': 1012, 'epoch': 1} {'type': 'loss', 'content': 0.21040230989456177, 'timestamp': '2025-09-10 02:33:49.328727', 'step': 1013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:49.381628', 'step': 1013, 'epoch': 1} {'type': 'loss', 'content': 0.20724181830883026, 'timestamp': '2025-09-10 02:33:49.383556', 'step': 1014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:49.436021', 'step': 1014, 'epoch': 1} {'type': 'loss', 'content': 0.13653084635734558, 'timestamp': '2025-09-10 02:33:49.438087', 'step': 1015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:49.490918', 'step': 1015, 'epoch': 1} {'type': 'loss', 'content': 0.3019977807998657, 'timestamp': '2025-09-10 02:33:49.496875', 'step': 1016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:49.549163', 'step': 1016, 'epoch': 1} {'type': 'loss', 'content': 0.1548304408788681, 'timestamp': '2025-09-10 02:33:49.551128', 'step': 1017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:49.604192', 'step': 1017, 'epoch': 1} {'type': 'loss', 'content': 0.12929001450538635, 'timestamp': '2025-09-10 02:33:49.606204', 'step': 1018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:49.659334', 'step': 1018, 'epoch': 1} {'type': 'loss', 'content': 0.15554623305797577, 'timestamp': '2025-09-10 02:33:49.661262', 'step': 1019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:49.714757', 'step': 1019, 'epoch': 1} {'type': 'loss', 'content': 0.2044810801744461, 'timestamp': '2025-09-10 02:33:49.720696', 'step': 1020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:49.772698', 'step': 1020, 'epoch': 1} {'type': 'loss', 'content': 0.1939009577035904, 'timestamp': '2025-09-10 02:33:49.774589', 'step': 1021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:49.827118', 'step': 1021, 'epoch': 1} {'type': 'loss', 'content': 0.2785954177379608, 'timestamp': '2025-09-10 02:33:49.829142', 'step': 1022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:49.881542', 'step': 1022, 'epoch': 1} {'type': 'loss', 'content': 0.18791840970516205, 'timestamp': '2025-09-10 02:33:49.883501', 'step': 1023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:49.935807', 'step': 1023, 'epoch': 1} {'type': 'loss', 'content': 0.17253561317920685, 'timestamp': '2025-09-10 02:33:49.941671', 'step': 1024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:49.993571', 'step': 1024, 'epoch': 1} {'type': 'loss', 'content': 0.12645463645458221, 'timestamp': '2025-09-10 02:33:49.995527', 'step': 1025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:50.048718', 'step': 1025, 'epoch': 1} {'type': 'loss', 'content': 0.2238384187221527, 'timestamp': '2025-09-10 02:33:50.050709', 'step': 1026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:50.104480', 'step': 1026, 'epoch': 1} {'type': 'loss', 'content': 0.154274582862854, 'timestamp': '2025-09-10 02:33:50.106595', 'step': 1027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:50.161000', 'step': 1027, 'epoch': 1} {'type': 'loss', 'content': 0.25815993547439575, 'timestamp': '2025-09-10 02:33:50.167058', 'step': 1028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:50.220253', 'step': 1028, 'epoch': 1} {'type': 'loss', 'content': 0.2522408068180084, 'timestamp': '2025-09-10 02:33:50.222253', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:50.275204', 'step': 1029, 'epoch': 1} {'type': 'loss', 'content': 0.15564027428627014, 'timestamp': '2025-09-10 02:33:50.277359', 'step': 1030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:50.329751', 'step': 1030, 'epoch': 1} {'type': 'loss', 'content': 0.3174448013305664, 'timestamp': '2025-09-10 02:33:50.331846', 'step': 1031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:50.388544', 'step': 1031, 'epoch': 1} {'type': 'loss', 'content': 0.2078564465045929, 'timestamp': '2025-09-10 02:33:50.394392', 'step': 1032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:50.446352', 'step': 1032, 'epoch': 1} {'type': 'loss', 'content': 0.27451562881469727, 'timestamp': '2025-09-10 02:33:50.448566', 'step': 1033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:50.500882', 'step': 1033, 'epoch': 1} {'type': 'loss', 'content': 0.14976155757904053, 'timestamp': '2025-09-10 02:33:50.502930', 'step': 1034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:50.555290', 'step': 1034, 'epoch': 1} {'type': 'loss', 'content': 0.3096054196357727, 'timestamp': '2025-09-10 02:33:50.557345', 'step': 1035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:50.609905', 'step': 1035, 'epoch': 1} {'type': 'loss', 'content': 0.2584335207939148, 'timestamp': '2025-09-10 02:33:50.615814', 'step': 1036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:50.671073', 'step': 1036, 'epoch': 1} {'type': 'loss', 'content': 0.21898141503334045, 'timestamp': '2025-09-10 02:33:50.673397', 'step': 1037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:50.729883', 'step': 1037, 'epoch': 1} {'type': 'loss', 'content': 0.16371634602546692, 'timestamp': '2025-09-10 02:33:50.732101', 'step': 1038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:50.785789', 'step': 1038, 'epoch': 1} {'type': 'loss', 'content': 0.12592355906963348, 'timestamp': '2025-09-10 02:33:50.787732', 'step': 1039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:33:50.843408', 'step': 1039, 'epoch': 1} {'type': 'loss', 'content': 0.20412850379943848, 'timestamp': '2025-09-10 02:33:50.849314', 'step': 1040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:50.902703', 'step': 1040, 'epoch': 1} {'type': 'loss', 'content': 0.2429477423429489, 'timestamp': '2025-09-10 02:33:50.904631', 'step': 1041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:50.956961', 'step': 1041, 'epoch': 1} {'type': 'loss', 'content': 0.24402646720409393, 'timestamp': '2025-09-10 02:33:50.958971', 'step': 1042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:51.014333', 'step': 1042, 'epoch': 1} {'type': 'loss', 'content': 0.16332033276557922, 'timestamp': '2025-09-10 02:33:51.016324', 'step': 1043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:51.072493', 'step': 1043, 'epoch': 1} {'type': 'loss', 'content': 0.22591015696525574, 'timestamp': '2025-09-10 02:33:51.078545', 'step': 1044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:51.133916', 'step': 1044, 'epoch': 1} {'type': 'loss', 'content': 0.14253106713294983, 'timestamp': '2025-09-10 02:33:51.136270', 'step': 1045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:51.190846', 'step': 1045, 'epoch': 1} {'type': 'loss', 'content': 0.14994457364082336, 'timestamp': '2025-09-10 02:33:51.193062', 'step': 1046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:51.247080', 'step': 1046, 'epoch': 1} {'type': 'loss', 'content': 0.2109394669532776, 'timestamp': '2025-09-10 02:33:51.249125', 'step': 1047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:51.303191', 'step': 1047, 'epoch': 1} {'type': 'loss', 'content': 0.23616567254066467, 'timestamp': '2025-09-10 02:33:51.309597', 'step': 1048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:51.363809', 'step': 1048, 'epoch': 1} {'type': 'loss', 'content': 0.24104741215705872, 'timestamp': '2025-09-10 02:33:51.365740', 'step': 1049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:51.419458', 'step': 1049, 'epoch': 1} {'type': 'loss', 'content': 0.1785721778869629, 'timestamp': '2025-09-10 02:33:51.421688', 'step': 1050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:51.477010', 'step': 1050, 'epoch': 1} {'type': 'loss', 'content': 0.1336807757616043, 'timestamp': '2025-09-10 02:33:51.479061', 'step': 1051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:51.533457', 'step': 1051, 'epoch': 1} {'type': 'loss', 'content': 0.15495465695858002, 'timestamp': '2025-09-10 02:33:51.539493', 'step': 1052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:51.593410', 'step': 1052, 'epoch': 1} {'type': 'loss', 'content': 0.18913805484771729, 'timestamp': '2025-09-10 02:33:51.595385', 'step': 1053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:51.650942', 'step': 1053, 'epoch': 1} {'type': 'loss', 'content': 0.1591154783964157, 'timestamp': '2025-09-10 02:33:51.653101', 'step': 1054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:51.706914', 'step': 1054, 'epoch': 1} {'type': 'loss', 'content': 0.1280459612607956, 'timestamp': '2025-09-10 02:33:51.709017', 'step': 1055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:33:51.762847', 'step': 1055, 'epoch': 1} {'type': 'loss', 'content': 0.21499529480934143, 'timestamp': '2025-09-10 02:33:51.769009', 'step': 1056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:51.821996', 'step': 1056, 'epoch': 1} {'type': 'loss', 'content': 0.2839573919773102, 'timestamp': '2025-09-10 02:33:51.824058', 'step': 1057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:51.879309', 'step': 1057, 'epoch': 1} {'type': 'loss', 'content': 0.17412501573562622, 'timestamp': '2025-09-10 02:33:51.881259', 'step': 1058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:51.934989', 'step': 1058, 'epoch': 1} {'type': 'loss', 'content': 0.21390444040298462, 'timestamp': '2025-09-10 02:33:51.937114', 'step': 1059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:51.989735', 'step': 1059, 'epoch': 1} {'type': 'loss', 'content': 0.26019394397735596, 'timestamp': '2025-09-10 02:33:51.995856', 'step': 1060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:52.048794', 'step': 1060, 'epoch': 1} {'type': 'loss', 'content': 0.32494354248046875, 'timestamp': '2025-09-10 02:33:52.050852', 'step': 1061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:52.106478', 'step': 1061, 'epoch': 1} {'type': 'loss', 'content': 0.1463298499584198, 'timestamp': '2025-09-10 02:33:52.108480', 'step': 1062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:52.161705', 'step': 1062, 'epoch': 1} {'type': 'loss', 'content': 0.20552794635295868, 'timestamp': '2025-09-10 02:33:52.163698', 'step': 1063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:52.216309', 'step': 1063, 'epoch': 1} {'type': 'loss', 'content': 0.16416634619235992, 'timestamp': '2025-09-10 02:33:52.222244', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:52.276799', 'step': 1064, 'epoch': 1} {'type': 'loss', 'content': 0.19897492229938507, 'timestamp': '2025-09-10 02:33:52.278807', 'step': 1065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:52.331648', 'step': 1065, 'epoch': 1} {'type': 'loss', 'content': 0.20432455837726593, 'timestamp': '2025-09-10 02:33:52.333616', 'step': 1066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:52.385883', 'step': 1066, 'epoch': 1} {'type': 'loss', 'content': 0.23562383651733398, 'timestamp': '2025-09-10 02:33:52.387836', 'step': 1067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:52.440335', 'step': 1067, 'epoch': 1} {'type': 'loss', 'content': 0.17050997912883759, 'timestamp': '2025-09-10 02:33:52.446221', 'step': 1068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:52.498002', 'step': 1068, 'epoch': 1} {'type': 'loss', 'content': 0.15417954325675964, 'timestamp': '2025-09-10 02:33:52.499952', 'step': 1069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:52.551761', 'step': 1069, 'epoch': 1} {'type': 'loss', 'content': 0.19931195676326752, 'timestamp': '2025-09-10 02:33:52.553803', 'step': 1070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:52.607070', 'step': 1070, 'epoch': 1} {'type': 'loss', 'content': 0.2129957526922226, 'timestamp': '2025-09-10 02:33:52.609074', 'step': 1071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:52.663007', 'step': 1071, 'epoch': 1} {'type': 'loss', 'content': 0.20687109231948853, 'timestamp': '2025-09-10 02:33:52.669060', 'step': 1072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:52.721552', 'step': 1072, 'epoch': 1} {'type': 'loss', 'content': 0.18647123873233795, 'timestamp': '2025-09-10 02:33:52.723635', 'step': 1073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:52.776715', 'step': 1073, 'epoch': 1} {'type': 'loss', 'content': 0.19734503328800201, 'timestamp': '2025-09-10 02:33:52.778888', 'step': 1074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:52.831982', 'step': 1074, 'epoch': 1} {'type': 'loss', 'content': 0.1710953414440155, 'timestamp': '2025-09-10 02:33:52.834138', 'step': 1075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:52.887818', 'step': 1075, 'epoch': 1} {'type': 'loss', 'content': 0.12907674908638, 'timestamp': '2025-09-10 02:33:52.893675', 'step': 1076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:52.947288', 'step': 1076, 'epoch': 1} {'type': 'loss', 'content': 0.2449091672897339, 'timestamp': '2025-09-10 02:33:52.949317', 'step': 1077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:53.002132', 'step': 1077, 'epoch': 1} {'type': 'loss', 'content': 0.18067260086536407, 'timestamp': '2025-09-10 02:33:53.004348', 'step': 1078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:53.057297', 'step': 1078, 'epoch': 1} {'type': 'loss', 'content': 0.1267329305410385, 'timestamp': '2025-09-10 02:33:53.059490', 'step': 1079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:53.111706', 'step': 1079, 'epoch': 1} {'type': 'loss', 'content': 0.254726767539978, 'timestamp': '2025-09-10 02:33:53.117653', 'step': 1080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:53.170122', 'step': 1080, 'epoch': 1} {'type': 'loss', 'content': 0.14634713530540466, 'timestamp': '2025-09-10 02:33:53.172163', 'step': 1081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:53.225076', 'step': 1081, 'epoch': 1} {'type': 'loss', 'content': 0.1366223394870758, 'timestamp': '2025-09-10 02:33:53.227147', 'step': 1082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:53.280129', 'step': 1082, 'epoch': 1} {'type': 'loss', 'content': 0.27629122138023376, 'timestamp': '2025-09-10 02:33:53.282163', 'step': 1083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:53.334965', 'step': 1083, 'epoch': 1} {'type': 'loss', 'content': 0.10051210224628448, 'timestamp': '2025-09-10 02:33:53.341144', 'step': 1084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:53.394060', 'step': 1084, 'epoch': 1} {'type': 'loss', 'content': 0.22792716324329376, 'timestamp': '2025-09-10 02:33:53.396086', 'step': 1085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:53.449390', 'step': 1085, 'epoch': 1} {'type': 'loss', 'content': 0.19263474643230438, 'timestamp': '2025-09-10 02:33:53.451467', 'step': 1086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:53.505411', 'step': 1086, 'epoch': 1} {'type': 'loss', 'content': 0.1828005015850067, 'timestamp': '2025-09-10 02:33:53.507500', 'step': 1087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:53.561419', 'step': 1087, 'epoch': 1} {'type': 'loss', 'content': 0.23950830101966858, 'timestamp': '2025-09-10 02:33:53.567477', 'step': 1088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:53.620241', 'step': 1088, 'epoch': 1} {'type': 'loss', 'content': 0.11594150215387344, 'timestamp': '2025-09-10 02:33:53.622310', 'step': 1089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:53.675247', 'step': 1089, 'epoch': 1} {'type': 'loss', 'content': 0.15479972958564758, 'timestamp': '2025-09-10 02:33:53.677048', 'step': 1090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:53.729909', 'step': 1090, 'epoch': 1} {'type': 'loss', 'content': 0.12343817204236984, 'timestamp': '2025-09-10 02:33:53.731933', 'step': 1091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:53.786698', 'step': 1091, 'epoch': 1} {'type': 'loss', 'content': 0.1619865596294403, 'timestamp': '2025-09-10 02:33:53.792662', 'step': 1092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:53.845095', 'step': 1092, 'epoch': 1} {'type': 'loss', 'content': 0.1796685755252838, 'timestamp': '2025-09-10 02:33:53.846850', 'step': 1093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:53.901087', 'step': 1093, 'epoch': 1} {'type': 'loss', 'content': 0.14163927733898163, 'timestamp': '2025-09-10 02:33:53.903009', 'step': 1094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:53.957896', 'step': 1094, 'epoch': 1} {'type': 'loss', 'content': 0.1945696324110031, 'timestamp': '2025-09-10 02:33:53.959694', 'step': 1095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:54.014685', 'step': 1095, 'epoch': 1} {'type': 'loss', 'content': 0.16426976025104523, 'timestamp': '2025-09-10 02:33:54.021101', 'step': 1096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:54.077057', 'step': 1096, 'epoch': 1} {'type': 'loss', 'content': 0.23587560653686523, 'timestamp': '2025-09-10 02:33:54.079312', 'step': 1097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:54.133530', 'step': 1097, 'epoch': 1} {'type': 'loss', 'content': 0.20895102620124817, 'timestamp': '2025-09-10 02:33:54.135658', 'step': 1098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:54.189813', 'step': 1098, 'epoch': 1} {'type': 'loss', 'content': 0.18075186014175415, 'timestamp': '2025-09-10 02:33:54.192077', 'step': 1099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:54.245334', 'step': 1099, 'epoch': 1} {'type': 'loss', 'content': 0.15316233038902283, 'timestamp': '2025-09-10 02:33:54.251591', 'step': 1100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:54.305229', 'step': 1100, 'epoch': 1} {'type': 'loss', 'content': 0.12598688900470734, 'timestamp': '2025-09-10 02:33:54.307058', 'step': 1101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:54.360531', 'step': 1101, 'epoch': 1} {'type': 'loss', 'content': 0.14344121515750885, 'timestamp': '2025-09-10 02:33:54.362311', 'step': 1102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:54.415570', 'step': 1102, 'epoch': 1} {'type': 'loss', 'content': 0.18436548113822937, 'timestamp': '2025-09-10 02:33:54.417474', 'step': 1103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:54.470720', 'step': 1103, 'epoch': 1} {'type': 'loss', 'content': 0.13673320412635803, 'timestamp': '2025-09-10 02:33:54.476749', 'step': 1104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:54.529944', 'step': 1104, 'epoch': 1} {'type': 'loss', 'content': 0.25825193524360657, 'timestamp': '2025-09-10 02:33:54.532011', 'step': 1105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:54.585179', 'step': 1105, 'epoch': 1} {'type': 'loss', 'content': 0.168921560049057, 'timestamp': '2025-09-10 02:33:54.587153', 'step': 1106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:54.640718', 'step': 1106, 'epoch': 1} {'type': 'loss', 'content': 0.21905158460140228, 'timestamp': '2025-09-10 02:33:54.642894', 'step': 1107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:54.696597', 'step': 1107, 'epoch': 1} {'type': 'loss', 'content': 0.22767119109630585, 'timestamp': '2025-09-10 02:33:54.702833', 'step': 1108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:54.755496', 'step': 1108, 'epoch': 1} {'type': 'loss', 'content': 0.16288883984088898, 'timestamp': '2025-09-10 02:33:54.757640', 'step': 1109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:54.810989', 'step': 1109, 'epoch': 1} {'type': 'loss', 'content': 0.16567641496658325, 'timestamp': '2025-09-10 02:33:54.812996', 'step': 1110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:54.866690', 'step': 1110, 'epoch': 1} {'type': 'loss', 'content': 0.16093145310878754, 'timestamp': '2025-09-10 02:33:54.868492', 'step': 1111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:54.921530', 'step': 1111, 'epoch': 1} {'type': 'loss', 'content': 0.22907938063144684, 'timestamp': '2025-09-10 02:33:54.927370', 'step': 1112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:54.984786', 'step': 1112, 'epoch': 1} {'type': 'loss', 'content': 0.20364071428775787, 'timestamp': '2025-09-10 02:33:54.987099', 'step': 1113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:55.045518', 'step': 1113, 'epoch': 1} {'type': 'loss', 'content': 0.1592254638671875, 'timestamp': '2025-09-10 02:33:55.048668', 'step': 1114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:55.103299', 'step': 1114, 'epoch': 1} {'type': 'loss', 'content': 0.13476811349391937, 'timestamp': '2025-09-10 02:33:55.105476', 'step': 1115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:55.158771', 'step': 1115, 'epoch': 1} {'type': 'loss', 'content': 0.15169639885425568, 'timestamp': '2025-09-10 02:33:55.164856', 'step': 1116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:55.217173', 'step': 1116, 'epoch': 1} {'type': 'loss', 'content': 0.20642533898353577, 'timestamp': '2025-09-10 02:33:55.219636', 'step': 1117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:55.273146', 'step': 1117, 'epoch': 1} {'type': 'loss', 'content': 0.23647218942642212, 'timestamp': '2025-09-10 02:33:55.275056', 'step': 1118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:55.329033', 'step': 1118, 'epoch': 1} {'type': 'loss', 'content': 0.19545303285121918, 'timestamp': '2025-09-10 02:33:55.330915', 'step': 1119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:55.386540', 'step': 1119, 'epoch': 1} {'type': 'loss', 'content': 0.1779831349849701, 'timestamp': '2025-09-10 02:33:55.392617', 'step': 1120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:55.446392', 'step': 1120, 'epoch': 1} {'type': 'loss', 'content': 0.24106191098690033, 'timestamp': '2025-09-10 02:33:55.450018', 'step': 1121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:55.503465', 'step': 1121, 'epoch': 1} {'type': 'loss', 'content': 0.21762390434741974, 'timestamp': '2025-09-10 02:33:55.509626', 'step': 1122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:55.563422', 'step': 1122, 'epoch': 1} {'type': 'loss', 'content': 0.16547416150569916, 'timestamp': '2025-09-10 02:33:55.565618', 'step': 1123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:55.619217', 'step': 1123, 'epoch': 1} {'type': 'loss', 'content': 0.201889768242836, 'timestamp': '2025-09-10 02:33:55.625341', 'step': 1124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:55.677660', 'step': 1124, 'epoch': 1} {'type': 'loss', 'content': 0.1638774573802948, 'timestamp': '2025-09-10 02:33:55.680356', 'step': 1125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:55.742070', 'step': 1125, 'epoch': 1} {'type': 'loss', 'content': 0.1412767618894577, 'timestamp': '2025-09-10 02:33:55.744046', 'step': 1126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:33:55.803644', 'step': 1126, 'epoch': 1} {'type': 'loss', 'content': 0.21980373561382294, 'timestamp': '2025-09-10 02:33:55.809741', 'step': 1127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:55.864742', 'step': 1127, 'epoch': 1} {'type': 'loss', 'content': 0.22274582087993622, 'timestamp': '2025-09-10 02:33:55.870654', 'step': 1128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:55.924034', 'step': 1128, 'epoch': 1} {'type': 'loss', 'content': 0.12602896988391876, 'timestamp': '2025-09-10 02:33:55.926059', 'step': 1129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:55.979460', 'step': 1129, 'epoch': 1} {'type': 'loss', 'content': 0.1966758370399475, 'timestamp': '2025-09-10 02:33:55.981637', 'step': 1130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:56.035214', 'step': 1130, 'epoch': 1} {'type': 'loss', 'content': 0.2629774212837219, 'timestamp': '2025-09-10 02:33:56.037626', 'step': 1131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:56.093616', 'step': 1131, 'epoch': 1} {'type': 'loss', 'content': 0.14672988653182983, 'timestamp': '2025-09-10 02:33:56.099955', 'step': 1132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:56.156325', 'step': 1132, 'epoch': 1} {'type': 'loss', 'content': 0.2609669268131256, 'timestamp': '2025-09-10 02:33:56.158424', 'step': 1133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:56.214228', 'step': 1133, 'epoch': 1} {'type': 'loss', 'content': 0.15657278895378113, 'timestamp': '2025-09-10 02:33:56.216015', 'step': 1134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:56.271755', 'step': 1134, 'epoch': 1} {'type': 'loss', 'content': 0.3505898118019104, 'timestamp': '2025-09-10 02:33:56.273789', 'step': 1135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:56.330182', 'step': 1135, 'epoch': 1} {'type': 'loss', 'content': 0.2733868360519409, 'timestamp': '2025-09-10 02:33:56.336193', 'step': 1136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:56.392814', 'step': 1136, 'epoch': 1} {'type': 'loss', 'content': 0.18526782095432281, 'timestamp': '2025-09-10 02:33:56.394623', 'step': 1137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:56.448878', 'step': 1137, 'epoch': 1} {'type': 'loss', 'content': 0.17785707116127014, 'timestamp': '2025-09-10 02:33:56.450983', 'step': 1138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:56.505020', 'step': 1138, 'epoch': 1} {'type': 'loss', 'content': 0.17603349685668945, 'timestamp': '2025-09-10 02:33:56.507035', 'step': 1139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:56.561551', 'step': 1139, 'epoch': 1} {'type': 'loss', 'content': 0.10352152585983276, 'timestamp': '2025-09-10 02:33:56.567847', 'step': 1140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:56.621562', 'step': 1140, 'epoch': 1} {'type': 'loss', 'content': 0.1852683573961258, 'timestamp': '2025-09-10 02:33:56.623713', 'step': 1141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:56.677978', 'step': 1141, 'epoch': 1} {'type': 'loss', 'content': 0.23982955515384674, 'timestamp': '2025-09-10 02:33:56.679963', 'step': 1142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:33:56.735143', 'step': 1142, 'epoch': 1} {'type': 'loss', 'content': 0.11256951093673706, 'timestamp': '2025-09-10 02:33:56.736923', 'step': 1143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:56.790756', 'step': 1143, 'epoch': 1} {'type': 'loss', 'content': 0.23078122735023499, 'timestamp': '2025-09-10 02:33:56.797080', 'step': 1144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:33:56.850915', 'step': 1144, 'epoch': 1} {'type': 'loss', 'content': 0.17421358823776245, 'timestamp': '2025-09-10 02:33:56.852733', 'step': 1145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:56.906812', 'step': 1145, 'epoch': 1} {'type': 'loss', 'content': 0.12947463989257812, 'timestamp': '2025-09-10 02:33:56.908757', 'step': 1146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:56.963522', 'step': 1146, 'epoch': 1} {'type': 'loss', 'content': 0.19391973316669464, 'timestamp': '2025-09-10 02:33:56.965918', 'step': 1147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:57.020112', 'step': 1147, 'epoch': 1} {'type': 'loss', 'content': 0.2873455882072449, 'timestamp': '2025-09-10 02:33:57.026217', 'step': 1148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:57.079094', 'step': 1148, 'epoch': 1} {'type': 'loss', 'content': 0.26546794176101685, 'timestamp': '2025-09-10 02:33:57.081212', 'step': 1149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:57.139157', 'step': 1149, 'epoch': 1} {'type': 'loss', 'content': 0.25890448689460754, 'timestamp': '2025-09-10 02:33:57.141184', 'step': 1150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:57.196536', 'step': 1150, 'epoch': 1} {'type': 'loss', 'content': 0.11575021594762802, 'timestamp': '2025-09-10 02:33:57.198362', 'step': 1151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:57.252226', 'step': 1151, 'epoch': 1} {'type': 'loss', 'content': 0.20485638082027435, 'timestamp': '2025-09-10 02:33:57.257980', 'step': 1152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:57.310793', 'step': 1152, 'epoch': 1} {'type': 'loss', 'content': 0.15336556732654572, 'timestamp': '2025-09-10 02:33:57.312905', 'step': 1153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:57.367078', 'step': 1153, 'epoch': 1} {'type': 'loss', 'content': 0.14116589725017548, 'timestamp': '2025-09-10 02:33:57.368886', 'step': 1154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:57.422623', 'step': 1154, 'epoch': 1} {'type': 'loss', 'content': 0.1705351173877716, 'timestamp': '2025-09-10 02:33:57.424631', 'step': 1155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:33:57.478758', 'step': 1155, 'epoch': 1} {'type': 'loss', 'content': 0.22041356563568115, 'timestamp': '2025-09-10 02:33:57.485198', 'step': 1156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:57.538841', 'step': 1156, 'epoch': 1} {'type': 'loss', 'content': 0.22461746633052826, 'timestamp': '2025-09-10 02:33:57.541061', 'step': 1157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:57.595831', 'step': 1157, 'epoch': 1} {'type': 'loss', 'content': 0.21684648096561432, 'timestamp': '2025-09-10 02:33:57.598111', 'step': 1158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:57.653483', 'step': 1158, 'epoch': 1} {'type': 'loss', 'content': 0.17331013083457947, 'timestamp': '2025-09-10 02:33:57.655576', 'step': 1159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:57.710226', 'step': 1159, 'epoch': 1} {'type': 'loss', 'content': 0.16394494473934174, 'timestamp': '2025-09-10 02:33:57.716268', 'step': 1160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:57.770262', 'step': 1160, 'epoch': 1} {'type': 'loss', 'content': 0.1320430189371109, 'timestamp': '2025-09-10 02:33:57.772572', 'step': 1161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:57.827679', 'step': 1161, 'epoch': 1} {'type': 'loss', 'content': 0.2607515752315521, 'timestamp': '2025-09-10 02:33:57.829956', 'step': 1162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:57.884914', 'step': 1162, 'epoch': 1} {'type': 'loss', 'content': 0.20393683016300201, 'timestamp': '2025-09-10 02:33:57.892186', 'step': 1163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:57.947686', 'step': 1163, 'epoch': 1} {'type': 'loss', 'content': 0.12244535237550735, 'timestamp': '2025-09-10 02:33:57.956054', 'step': 1164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:58.013318', 'step': 1164, 'epoch': 1} {'type': 'loss', 'content': 0.14584963023662567, 'timestamp': '2025-09-10 02:33:58.015632', 'step': 1165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:58.072468', 'step': 1165, 'epoch': 1} {'type': 'loss', 'content': 0.1550322026014328, 'timestamp': '2025-09-10 02:33:58.074242', 'step': 1166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:58.134647', 'step': 1166, 'epoch': 1} {'type': 'loss', 'content': 0.17600029706954956, 'timestamp': '2025-09-10 02:33:58.136445', 'step': 1167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:58.190277', 'step': 1167, 'epoch': 1} {'type': 'loss', 'content': 0.15263906121253967, 'timestamp': '2025-09-10 02:33:58.196347', 'step': 1168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:58.254945', 'step': 1168, 'epoch': 1} {'type': 'loss', 'content': 0.1900622397661209, 'timestamp': '2025-09-10 02:33:58.256705', 'step': 1169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:58.312136', 'step': 1169, 'epoch': 1} {'type': 'loss', 'content': 0.19281186163425446, 'timestamp': '2025-09-10 02:33:58.314480', 'step': 1170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:58.374298', 'step': 1170, 'epoch': 1} {'type': 'loss', 'content': 0.15772292017936707, 'timestamp': '2025-09-10 02:33:58.376651', 'step': 1171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:58.438588', 'step': 1171, 'epoch': 1} {'type': 'loss', 'content': 0.18719437718391418, 'timestamp': '2025-09-10 02:33:58.453596', 'step': 1172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:58.507632', 'step': 1172, 'epoch': 1} {'type': 'loss', 'content': 0.1641417294740677, 'timestamp': '2025-09-10 02:33:58.512051', 'step': 1173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:58.573863', 'step': 1173, 'epoch': 1} {'type': 'loss', 'content': 0.18605659902095795, 'timestamp': '2025-09-10 02:33:58.576067', 'step': 1174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:58.630317', 'step': 1174, 'epoch': 1} {'type': 'loss', 'content': 0.15468917787075043, 'timestamp': '2025-09-10 02:33:58.632459', 'step': 1175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:58.686377', 'step': 1175, 'epoch': 1} {'type': 'loss', 'content': 0.22505301237106323, 'timestamp': '2025-09-10 02:33:58.692803', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:58.746800', 'step': 1176, 'epoch': 1} {'type': 'loss', 'content': 0.17868132889270782, 'timestamp': '2025-09-10 02:33:58.748758', 'step': 1177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:58.804249', 'step': 1177, 'epoch': 1} {'type': 'loss', 'content': 0.25941529870033264, 'timestamp': '2025-09-10 02:33:58.806561', 'step': 1178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:58.862418', 'step': 1178, 'epoch': 1} {'type': 'loss', 'content': 0.1397390365600586, 'timestamp': '2025-09-10 02:33:58.866000', 'step': 1179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:58.932425', 'step': 1179, 'epoch': 1} {'type': 'loss', 'content': 0.2130793035030365, 'timestamp': '2025-09-10 02:33:58.942331', 'step': 1180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:59.006359', 'step': 1180, 'epoch': 1} {'type': 'loss', 'content': 0.20014314353466034, 'timestamp': '2025-09-10 02:33:59.008810', 'step': 1181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:59.062829', 'step': 1181, 'epoch': 1} {'type': 'loss', 'content': 0.17664118111133575, 'timestamp': '2025-09-10 02:33:59.064897', 'step': 1182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:59.119266', 'step': 1182, 'epoch': 1} {'type': 'loss', 'content': 0.1550840437412262, 'timestamp': '2025-09-10 02:33:59.124089', 'step': 1183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:59.180073', 'step': 1183, 'epoch': 1} {'type': 'loss', 'content': 0.12380845099687576, 'timestamp': '2025-09-10 02:33:59.195337', 'step': 1184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:33:59.254507', 'step': 1184, 'epoch': 1} {'type': 'loss', 'content': 0.1440662443637848, 'timestamp': '2025-09-10 02:33:59.256527', 'step': 1185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:59.311615', 'step': 1185, 'epoch': 1} {'type': 'loss', 'content': 0.18002808094024658, 'timestamp': '2025-09-10 02:33:59.313816', 'step': 1186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:59.368475', 'step': 1186, 'epoch': 1} {'type': 'loss', 'content': 0.2461969405412674, 'timestamp': '2025-09-10 02:33:59.370666', 'step': 1187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:59.425540', 'step': 1187, 'epoch': 1} {'type': 'loss', 'content': 0.1934744268655777, 'timestamp': '2025-09-10 02:33:59.431806', 'step': 1188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:59.485357', 'step': 1188, 'epoch': 1} {'type': 'loss', 'content': 0.13905347883701324, 'timestamp': '2025-09-10 02:33:59.487453', 'step': 1189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:59.541014', 'step': 1189, 'epoch': 1} {'type': 'loss', 'content': 0.15998303890228271, 'timestamp': '2025-09-10 02:33:59.543278', 'step': 1190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:33:59.597784', 'step': 1190, 'epoch': 1} {'type': 'loss', 'content': 0.20286159217357635, 'timestamp': '2025-09-10 02:33:59.599994', 'step': 1191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:33:59.654842', 'step': 1191, 'epoch': 1} {'type': 'loss', 'content': 0.09495752304792404, 'timestamp': '2025-09-10 02:33:59.660969', 'step': 1192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:59.716063', 'step': 1192, 'epoch': 1} {'type': 'loss', 'content': 0.16349394619464874, 'timestamp': '2025-09-10 02:33:59.718408', 'step': 1193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:59.772943', 'step': 1193, 'epoch': 1} {'type': 'loss', 'content': 0.23833325505256653, 'timestamp': '2025-09-10 02:33:59.775130', 'step': 1194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:59.830891', 'step': 1194, 'epoch': 1} {'type': 'loss', 'content': 0.1974266916513443, 'timestamp': '2025-09-10 02:33:59.832966', 'step': 1195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:59.888057', 'step': 1195, 'epoch': 1} {'type': 'loss', 'content': 0.19846762716770172, 'timestamp': '2025-09-10 02:33:59.894422', 'step': 1196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:33:59.948280', 'step': 1196, 'epoch': 1} {'type': 'loss', 'content': 0.25860923528671265, 'timestamp': '2025-09-10 02:33:59.950628', 'step': 1197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:00.004799', 'step': 1197, 'epoch': 1} {'type': 'loss', 'content': 0.14303484559059143, 'timestamp': '2025-09-10 02:34:00.006859', 'step': 1198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:00.060749', 'step': 1198, 'epoch': 1} {'type': 'loss', 'content': 0.16943037509918213, 'timestamp': '2025-09-10 02:34:00.062822', 'step': 1199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:00.117828', 'step': 1199, 'epoch': 1} {'type': 'loss', 'content': 0.12071249634027481, 'timestamp': '2025-09-10 02:34:00.124215', 'step': 1200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:00.185495', 'step': 1200, 'epoch': 1} {'type': 'loss', 'content': 0.15674231946468353, 'timestamp': '2025-09-10 02:34:00.187487', 'step': 1201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:00.241804', 'step': 1201, 'epoch': 1} {'type': 'loss', 'content': 0.17699894309043884, 'timestamp': '2025-09-10 02:34:00.243760', 'step': 1202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:00.299356', 'step': 1202, 'epoch': 1} {'type': 'loss', 'content': 0.19463223218917847, 'timestamp': '2025-09-10 02:34:00.301491', 'step': 1203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:00.359941', 'step': 1203, 'epoch': 1} {'type': 'loss', 'content': 0.1898779720067978, 'timestamp': '2025-09-10 02:34:00.366915', 'step': 1204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:00.423145', 'step': 1204, 'epoch': 1} {'type': 'loss', 'content': 0.15626399219036102, 'timestamp': '2025-09-10 02:34:00.425323', 'step': 1205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:00.480424', 'step': 1205, 'epoch': 1} {'type': 'loss', 'content': 0.227534219622612, 'timestamp': '2025-09-10 02:34:00.482578', 'step': 1206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:00.538220', 'step': 1206, 'epoch': 1} {'type': 'loss', 'content': 0.1747010201215744, 'timestamp': '2025-09-10 02:34:00.540445', 'step': 1207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:00.595016', 'step': 1207, 'epoch': 1} {'type': 'loss', 'content': 0.17742204666137695, 'timestamp': '2025-09-10 02:34:00.601562', 'step': 1208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:00.656082', 'step': 1208, 'epoch': 1} {'type': 'loss', 'content': 0.10844826698303223, 'timestamp': '2025-09-10 02:34:00.658027', 'step': 1209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:00.713507', 'step': 1209, 'epoch': 1} {'type': 'loss', 'content': 0.22558248043060303, 'timestamp': '2025-09-10 02:34:00.715547', 'step': 1210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:00.770553', 'step': 1210, 'epoch': 1} {'type': 'loss', 'content': 0.15241944789886475, 'timestamp': '2025-09-10 02:34:00.772651', 'step': 1211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:00.827057', 'step': 1211, 'epoch': 1} {'type': 'loss', 'content': 0.2011488825082779, 'timestamp': '2025-09-10 02:34:00.833226', 'step': 1212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:00.886494', 'step': 1212, 'epoch': 1} {'type': 'loss', 'content': 0.1479550302028656, 'timestamp': '2025-09-10 02:34:00.888454', 'step': 1213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:00.942437', 'step': 1213, 'epoch': 1} {'type': 'loss', 'content': 0.13790160417556763, 'timestamp': '2025-09-10 02:34:00.944294', 'step': 1214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:00.997758', 'step': 1214, 'epoch': 1} {'type': 'loss', 'content': 0.15684403479099274, 'timestamp': '2025-09-10 02:34:01', 'step': 1215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:01.052694', 'step': 1215, 'epoch': 1} {'type': 'loss', 'content': 0.13618218898773193, 'timestamp': '2025-09-10 02:34:01.058979', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:01.112179', 'step': 1216, 'epoch': 1} {'type': 'loss', 'content': 0.17270450294017792, 'timestamp': '2025-09-10 02:34:01.114298', 'step': 1217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:01.174417', 'step': 1217, 'epoch': 1} {'type': 'loss', 'content': 0.14529448747634888, 'timestamp': '2025-09-10 02:34:01.176796', 'step': 1218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:01.230079', 'step': 1218, 'epoch': 1} {'type': 'loss', 'content': 0.20104564726352692, 'timestamp': '2025-09-10 02:34:01.232328', 'step': 1219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:01.285967', 'step': 1219, 'epoch': 1} {'type': 'loss', 'content': 0.1487194001674652, 'timestamp': '2025-09-10 02:34:01.292309', 'step': 1220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:01.344668', 'step': 1220, 'epoch': 1} {'type': 'loss', 'content': 0.19081485271453857, 'timestamp': '2025-09-10 02:34:01.346833', 'step': 1221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:01.399931', 'step': 1221, 'epoch': 1} {'type': 'loss', 'content': 0.18268831074237823, 'timestamp': '2025-09-10 02:34:01.402145', 'step': 1222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:01.456046', 'step': 1222, 'epoch': 1} {'type': 'loss', 'content': 0.18311995267868042, 'timestamp': '2025-09-10 02:34:01.458365', 'step': 1223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:01.511183', 'step': 1223, 'epoch': 1} {'type': 'loss', 'content': 0.14530828595161438, 'timestamp': '2025-09-10 02:34:01.517481', 'step': 1224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:01.571979', 'step': 1224, 'epoch': 1} {'type': 'loss', 'content': 0.27193811535835266, 'timestamp': '2025-09-10 02:34:01.574221', 'step': 1225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:01.626726', 'step': 1225, 'epoch': 1} {'type': 'loss', 'content': 0.28927546739578247, 'timestamp': '2025-09-10 02:34:01.628969', 'step': 1226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:01.681523', 'step': 1226, 'epoch': 1} {'type': 'loss', 'content': 0.2397468239068985, 'timestamp': '2025-09-10 02:34:01.683690', 'step': 1227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:01.736523', 'step': 1227, 'epoch': 1} {'type': 'loss', 'content': 0.1305008977651596, 'timestamp': '2025-09-10 02:34:01.742260', 'step': 1228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:01.794670', 'step': 1228, 'epoch': 1} {'type': 'loss', 'content': 0.20439007878303528, 'timestamp': '2025-09-10 02:34:01.796927', 'step': 1229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:01.851544', 'step': 1229, 'epoch': 1} {'type': 'loss', 'content': 0.3051568269729614, 'timestamp': '2025-09-10 02:34:01.853685', 'step': 1230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:01.906449', 'step': 1230, 'epoch': 1} {'type': 'loss', 'content': 0.2390100210905075, 'timestamp': '2025-09-10 02:34:01.908525', 'step': 1231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:01.960812', 'step': 1231, 'epoch': 1} {'type': 'loss', 'content': 0.19292815029621124, 'timestamp': '2025-09-10 02:34:01.967340', 'step': 1232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:02.019918', 'step': 1232, 'epoch': 1} {'type': 'loss', 'content': 0.17782427370548248, 'timestamp': '2025-09-10 02:34:02.022380', 'step': 1233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:02.076990', 'step': 1233, 'epoch': 1} {'type': 'loss', 'content': 0.1717969924211502, 'timestamp': '2025-09-10 02:34:02.079207', 'step': 1234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:02.132156', 'step': 1234, 'epoch': 1} {'type': 'loss', 'content': 0.13182379305362701, 'timestamp': '2025-09-10 02:34:02.134340', 'step': 1235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:02.186785', 'step': 1235, 'epoch': 1} {'type': 'loss', 'content': 0.13636134564876556, 'timestamp': '2025-09-10 02:34:02.192648', 'step': 1236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:02.244965', 'step': 1236, 'epoch': 1} {'type': 'loss', 'content': 0.2100919783115387, 'timestamp': '2025-09-10 02:34:02.247293', 'step': 1237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:02.320517', 'step': 1237, 'epoch': 1} {'type': 'loss', 'content': 0.40893641114234924, 'timestamp': '2025-09-10 02:34:02.322817', 'step': 1238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:02.377228', 'step': 1238, 'epoch': 1} {'type': 'loss', 'content': 0.14428572356700897, 'timestamp': '2025-09-10 02:34:02.379427', 'step': 1239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:02.432772', 'step': 1239, 'epoch': 1} {'type': 'loss', 'content': 0.22757887840270996, 'timestamp': '2025-09-10 02:34:02.438392', 'step': 1240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:02.491093', 'step': 1240, 'epoch': 1} {'type': 'loss', 'content': 0.3019968569278717, 'timestamp': '2025-09-10 02:34:02.493113', 'step': 1241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:02.546669', 'step': 1241, 'epoch': 1} {'type': 'loss', 'content': 0.1795724332332611, 'timestamp': '2025-09-10 02:34:02.548892', 'step': 1242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:02.601873', 'step': 1242, 'epoch': 1} {'type': 'loss', 'content': 0.17987306416034698, 'timestamp': '2025-09-10 02:34:02.603780', 'step': 1243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:02.656424', 'step': 1243, 'epoch': 1} {'type': 'loss', 'content': 0.14515641331672668, 'timestamp': '2025-09-10 02:34:02.662197', 'step': 1244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:02.714835', 'step': 1244, 'epoch': 1} {'type': 'loss', 'content': 0.21854500472545624, 'timestamp': '2025-09-10 02:34:02.716664', 'step': 1245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:02.769403', 'step': 1245, 'epoch': 1} {'type': 'loss', 'content': 0.16534551978111267, 'timestamp': '2025-09-10 02:34:02.771486', 'step': 1246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:02.824765', 'step': 1246, 'epoch': 1} {'type': 'loss', 'content': 0.27108675241470337, 'timestamp': '2025-09-10 02:34:02.827107', 'step': 1247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:02.880275', 'step': 1247, 'epoch': 1} {'type': 'loss', 'content': 0.09712989628314972, 'timestamp': '2025-09-10 02:34:02.886229', 'step': 1248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:02.939218', 'step': 1248, 'epoch': 1} {'type': 'loss', 'content': 0.18667569756507874, 'timestamp': '2025-09-10 02:34:02.941231', 'step': 1249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:02.994061', 'step': 1249, 'epoch': 1} {'type': 'loss', 'content': 0.13197113573551178, 'timestamp': '2025-09-10 02:34:02.995933', 'step': 1250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:03.050922', 'step': 1250, 'epoch': 1} {'type': 'loss', 'content': 0.165843665599823, 'timestamp': '2025-09-10 02:34:03.053211', 'step': 1251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:03.106009', 'step': 1251, 'epoch': 1} {'type': 'loss', 'content': 0.1325726956129074, 'timestamp': '2025-09-10 02:34:03.111774', 'step': 1252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.164669', 'step': 1252, 'epoch': 1} {'type': 'loss', 'content': 0.1514969766139984, 'timestamp': '2025-09-10 02:34:03.166799', 'step': 1253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.220121', 'step': 1253, 'epoch': 1} {'type': 'loss', 'content': 0.15947413444519043, 'timestamp': '2025-09-10 02:34:03.222174', 'step': 1254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.275805', 'step': 1254, 'epoch': 1} {'type': 'loss', 'content': 0.16610537469387054, 'timestamp': '2025-09-10 02:34:03.277888', 'step': 1255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:03.331026', 'step': 1255, 'epoch': 1} {'type': 'loss', 'content': 0.1826983392238617, 'timestamp': '2025-09-10 02:34:03.336560', 'step': 1256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.389264', 'step': 1256, 'epoch': 1} {'type': 'loss', 'content': 0.17015956342220306, 'timestamp': '2025-09-10 02:34:03.391358', 'step': 1257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.444220', 'step': 1257, 'epoch': 1} {'type': 'loss', 'content': 0.21105284988880157, 'timestamp': '2025-09-10 02:34:03.446159', 'step': 1258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.500909', 'step': 1258, 'epoch': 1} {'type': 'loss', 'content': 0.21437323093414307, 'timestamp': '2025-09-10 02:34:03.502976', 'step': 1259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.556067', 'step': 1259, 'epoch': 1} {'type': 'loss', 'content': 0.2573264241218567, 'timestamp': '2025-09-10 02:34:03.561770', 'step': 1260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:03.614644', 'step': 1260, 'epoch': 1} {'type': 'loss', 'content': 0.21648603677749634, 'timestamp': '2025-09-10 02:34:03.616702', 'step': 1261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:03.669626', 'step': 1261, 'epoch': 1} {'type': 'loss', 'content': 0.24237680435180664, 'timestamp': '2025-09-10 02:34:03.671804', 'step': 1262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.724948', 'step': 1262, 'epoch': 1} {'type': 'loss', 'content': 0.18573834002017975, 'timestamp': '2025-09-10 02:34:03.727007', 'step': 1263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.779866', 'step': 1263, 'epoch': 1} {'type': 'loss', 'content': 0.1556423008441925, 'timestamp': '2025-09-10 02:34:03.785818', 'step': 1264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.837869', 'step': 1264, 'epoch': 1} {'type': 'loss', 'content': 0.28624698519706726, 'timestamp': '2025-09-10 02:34:03.839936', 'step': 1265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:03.893574', 'step': 1265, 'epoch': 1} {'type': 'loss', 'content': 0.19134455919265747, 'timestamp': '2025-09-10 02:34:03.895626', 'step': 1266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:03.950535', 'step': 1266, 'epoch': 1} {'type': 'loss', 'content': 0.2520471513271332, 'timestamp': '2025-09-10 02:34:03.952578', 'step': 1267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:04.005898', 'step': 1267, 'epoch': 1} {'type': 'loss', 'content': 0.062166888266801834, 'timestamp': '2025-09-10 02:34:04.011464', 'step': 1268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:04.063503', 'step': 1268, 'epoch': 1} {'type': 'loss', 'content': 0.1684144139289856, 'timestamp': '2025-09-10 02:34:04.065581', 'step': 1269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:04.121093', 'step': 1269, 'epoch': 1} {'type': 'loss', 'content': 0.3624727427959442, 'timestamp': '2025-09-10 02:34:04.123030', 'step': 1270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:04.177504', 'step': 1270, 'epoch': 1} {'type': 'loss', 'content': 0.2299869805574417, 'timestamp': '2025-09-10 02:34:04.179564', 'step': 1271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:04.232274', 'step': 1271, 'epoch': 1} {'type': 'loss', 'content': 0.1938493549823761, 'timestamp': '2025-09-10 02:34:04.238149', 'step': 1272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:04.290883', 'step': 1272, 'epoch': 1} {'type': 'loss', 'content': 0.19889286160469055, 'timestamp': '2025-09-10 02:34:04.292926', 'step': 1273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:04.345988', 'step': 1273, 'epoch': 1} {'type': 'loss', 'content': 0.12178637087345123, 'timestamp': '2025-09-10 02:34:04.348070', 'step': 1274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:04.401063', 'step': 1274, 'epoch': 1} {'type': 'loss', 'content': 0.11680298298597336, 'timestamp': '2025-09-10 02:34:04.402946', 'step': 1275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:04.456021', 'step': 1275, 'epoch': 1} {'type': 'loss', 'content': 0.20347748696804047, 'timestamp': '2025-09-10 02:34:04.461945', 'step': 1276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:04.514843', 'step': 1276, 'epoch': 1} {'type': 'loss', 'content': 0.2826610505580902, 'timestamp': '2025-09-10 02:34:04.516874', 'step': 1277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:04.571678', 'step': 1277, 'epoch': 1} {'type': 'loss', 'content': 0.16738224029541016, 'timestamp': '2025-09-10 02:34:04.573824', 'step': 1278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:04.627323', 'step': 1278, 'epoch': 1} {'type': 'loss', 'content': 0.15574431419372559, 'timestamp': '2025-09-10 02:34:04.629494', 'step': 1279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:04.682683', 'step': 1279, 'epoch': 1} {'type': 'loss', 'content': 0.2042314112186432, 'timestamp': '2025-09-10 02:34:04.688435', 'step': 1280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:04.740924', 'step': 1280, 'epoch': 1} {'type': 'loss', 'content': 0.15884405374526978, 'timestamp': '2025-09-10 02:34:04.742774', 'step': 1281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:04.795667', 'step': 1281, 'epoch': 1} {'type': 'loss', 'content': 0.13826656341552734, 'timestamp': '2025-09-10 02:34:04.797747', 'step': 1282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:04.850088', 'step': 1282, 'epoch': 1} {'type': 'loss', 'content': 0.20716485381126404, 'timestamp': '2025-09-10 02:34:04.852140', 'step': 1283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:04.905313', 'step': 1283, 'epoch': 1} {'type': 'loss', 'content': 0.15984521806240082, 'timestamp': '2025-09-10 02:34:04.911000', 'step': 1284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:04.963144', 'step': 1284, 'epoch': 1} {'type': 'loss', 'content': 0.19375793635845184, 'timestamp': '2025-09-10 02:34:04.964930', 'step': 1285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:05.018881', 'step': 1285, 'epoch': 1} {'type': 'loss', 'content': 0.22714532911777496, 'timestamp': '2025-09-10 02:34:05.020909', 'step': 1286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:05.076555', 'step': 1286, 'epoch': 1} {'type': 'loss', 'content': 0.2605617642402649, 'timestamp': '2025-09-10 02:34:05.078720', 'step': 1287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:05.131709', 'step': 1287, 'epoch': 1} {'type': 'loss', 'content': 0.1417497992515564, 'timestamp': '2025-09-10 02:34:05.137451', 'step': 1288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:05.196048', 'step': 1288, 'epoch': 1} {'type': 'loss', 'content': 0.15796349942684174, 'timestamp': '2025-09-10 02:34:05.197994', 'step': 1289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:05.251981', 'step': 1289, 'epoch': 1} {'type': 'loss', 'content': 0.18637311458587646, 'timestamp': '2025-09-10 02:34:05.254107', 'step': 1290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:05.307719', 'step': 1290, 'epoch': 1} {'type': 'loss', 'content': 0.20591367781162262, 'timestamp': '2025-09-10 02:34:05.309939', 'step': 1291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:05.363049', 'step': 1291, 'epoch': 1} {'type': 'loss', 'content': 0.18548716604709625, 'timestamp': '2025-09-10 02:34:05.368805', 'step': 1292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:05.423822', 'step': 1292, 'epoch': 1} {'type': 'loss', 'content': 0.1576239913702011, 'timestamp': '2025-09-10 02:34:05.425876', 'step': 1293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:05.480366', 'step': 1293, 'epoch': 1} {'type': 'loss', 'content': 0.15129975974559784, 'timestamp': '2025-09-10 02:34:05.482418', 'step': 1294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:05.538109', 'step': 1294, 'epoch': 1} {'type': 'loss', 'content': 0.22243322432041168, 'timestamp': '2025-09-10 02:34:05.540002', 'step': 1295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:05.596795', 'step': 1295, 'epoch': 1} {'type': 'loss', 'content': 0.2163696140050888, 'timestamp': '2025-09-10 02:34:05.603526', 'step': 1296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:05.661861', 'step': 1296, 'epoch': 1} {'type': 'loss', 'content': 0.1522102952003479, 'timestamp': '2025-09-10 02:34:05.663973', 'step': 1297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:05.720583', 'step': 1297, 'epoch': 1} {'type': 'loss', 'content': 0.181023508310318, 'timestamp': '2025-09-10 02:34:05.722618', 'step': 1298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:05.778004', 'step': 1298, 'epoch': 1} {'type': 'loss', 'content': 0.17959220707416534, 'timestamp': '2025-09-10 02:34:05.780204', 'step': 1299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:05.835349', 'step': 1299, 'epoch': 1} {'type': 'loss', 'content': 0.16661512851715088, 'timestamp': '2025-09-10 02:34:05.841550', 'step': 1300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:05.894884', 'step': 1300, 'epoch': 1} {'type': 'loss', 'content': 0.260842502117157, 'timestamp': '2025-09-10 02:34:05.896950', 'step': 1301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:05.950096', 'step': 1301, 'epoch': 1} {'type': 'loss', 'content': 0.20525673031806946, 'timestamp': '2025-09-10 02:34:05.951934', 'step': 1302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:06.004700', 'step': 1302, 'epoch': 1} {'type': 'loss', 'content': 0.25822913646698, 'timestamp': '2025-09-10 02:34:06.006762', 'step': 1303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:06.059630', 'step': 1303, 'epoch': 1} {'type': 'loss', 'content': 0.20072388648986816, 'timestamp': '2025-09-10 02:34:06.065285', 'step': 1304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:06.118095', 'step': 1304, 'epoch': 1} {'type': 'loss', 'content': 0.17988234758377075, 'timestamp': '2025-09-10 02:34:06.120409', 'step': 1305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:06.174195', 'step': 1305, 'epoch': 1} {'type': 'loss', 'content': 0.3355196416378021, 'timestamp': '2025-09-10 02:34:06.176441', 'step': 1306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:06.233766', 'step': 1306, 'epoch': 1} {'type': 'loss', 'content': 0.16268832981586456, 'timestamp': '2025-09-10 02:34:06.235960', 'step': 1307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:06.290991', 'step': 1307, 'epoch': 1} {'type': 'loss', 'content': 0.25736379623413086, 'timestamp': '2025-09-10 02:34:06.297415', 'step': 1308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:06.351544', 'step': 1308, 'epoch': 1} {'type': 'loss', 'content': 0.15878544747829437, 'timestamp': '2025-09-10 02:34:06.353588', 'step': 1309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:06.408919', 'step': 1309, 'epoch': 1} {'type': 'loss', 'content': 0.10118860006332397, 'timestamp': '2025-09-10 02:34:06.410977', 'step': 1310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:06.466572', 'step': 1310, 'epoch': 1} {'type': 'loss', 'content': 0.15160472691059113, 'timestamp': '2025-09-10 02:34:06.468614', 'step': 1311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:06.525717', 'step': 1311, 'epoch': 1} {'type': 'loss', 'content': 0.1540622115135193, 'timestamp': '2025-09-10 02:34:06.531907', 'step': 1312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:06.586230', 'step': 1312, 'epoch': 1} {'type': 'loss', 'content': 0.20704007148742676, 'timestamp': '2025-09-10 02:34:06.588317', 'step': 1313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:06.642447', 'step': 1313, 'epoch': 1} {'type': 'loss', 'content': 0.17104552686214447, 'timestamp': '2025-09-10 02:34:06.644570', 'step': 1314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:06.697460', 'step': 1314, 'epoch': 1} {'type': 'loss', 'content': 0.21579432487487793, 'timestamp': '2025-09-10 02:34:06.699452', 'step': 1315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:06.752482', 'step': 1315, 'epoch': 1} {'type': 'loss', 'content': 0.23544257879257202, 'timestamp': '2025-09-10 02:34:06.758552', 'step': 1316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:06.811513', 'step': 1316, 'epoch': 1} {'type': 'loss', 'content': 0.1556471288204193, 'timestamp': '2025-09-10 02:34:06.813352', 'step': 1317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:06.865952', 'step': 1317, 'epoch': 1} {'type': 'loss', 'content': 0.1424562633037567, 'timestamp': '2025-09-10 02:34:06.867910', 'step': 1318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:06.920960', 'step': 1318, 'epoch': 1} {'type': 'loss', 'content': 0.18573524057865143, 'timestamp': '2025-09-10 02:34:06.922829', 'step': 1319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:06.976248', 'step': 1319, 'epoch': 1} {'type': 'loss', 'content': 0.10015080869197845, 'timestamp': '2025-09-10 02:34:06.982268', 'step': 1320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:07.034178', 'step': 1320, 'epoch': 1} {'type': 'loss', 'content': 0.2046733945608139, 'timestamp': '2025-09-10 02:34:07.036361', 'step': 1321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:07.089937', 'step': 1321, 'epoch': 1} {'type': 'loss', 'content': 0.12055535614490509, 'timestamp': '2025-09-10 02:34:07.091879', 'step': 1322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:07.146542', 'step': 1322, 'epoch': 1} {'type': 'loss', 'content': 0.1180391013622284, 'timestamp': '2025-09-10 02:34:07.148491', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:07.200962', 'step': 1323, 'epoch': 1} {'type': 'loss', 'content': 0.1406814157962799, 'timestamp': '2025-09-10 02:34:07.206535', 'step': 1324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:07.258402', 'step': 1324, 'epoch': 1} {'type': 'loss', 'content': 0.2955727279186249, 'timestamp': '2025-09-10 02:34:07.260475', 'step': 1325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:07.313821', 'step': 1325, 'epoch': 1} {'type': 'loss', 'content': 0.10089199990034103, 'timestamp': '2025-09-10 02:34:07.315603', 'step': 1326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:07.368322', 'step': 1326, 'epoch': 1} {'type': 'loss', 'content': 0.17526499927043915, 'timestamp': '2025-09-10 02:34:07.370388', 'step': 1327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:07.423449', 'step': 1327, 'epoch': 1} {'type': 'loss', 'content': 0.12390592694282532, 'timestamp': '2025-09-10 02:34:07.429096', 'step': 1328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:07.481621', 'step': 1328, 'epoch': 1} {'type': 'loss', 'content': 0.14079667627811432, 'timestamp': '2025-09-10 02:34:07.483643', 'step': 1329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:07.535953', 'step': 1329, 'epoch': 1} {'type': 'loss', 'content': 0.12305444478988647, 'timestamp': '2025-09-10 02:34:07.537842', 'step': 1330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:07.591276', 'step': 1330, 'epoch': 1} {'type': 'loss', 'content': 0.16188837587833405, 'timestamp': '2025-09-10 02:34:07.593107', 'step': 1331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:07.645765', 'step': 1331, 'epoch': 1} {'type': 'loss', 'content': 0.2525204122066498, 'timestamp': '2025-09-10 02:34:07.651363', 'step': 1332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:07.704505', 'step': 1332, 'epoch': 1} {'type': 'loss', 'content': 0.1837308704853058, 'timestamp': '2025-09-10 02:34:07.706524', 'step': 1333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:07.758959', 'step': 1333, 'epoch': 1} {'type': 'loss', 'content': 0.26107868552207947, 'timestamp': '2025-09-10 02:34:07.761107', 'step': 1334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:07.814703', 'step': 1334, 'epoch': 1} {'type': 'loss', 'content': 0.19569997489452362, 'timestamp': '2025-09-10 02:34:07.816877', 'step': 1335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:07.870367', 'step': 1335, 'epoch': 1} {'type': 'loss', 'content': 0.1747598499059677, 'timestamp': '2025-09-10 02:34:07.876198', 'step': 1336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:07.928566', 'step': 1336, 'epoch': 1} {'type': 'loss', 'content': 0.1025848239660263, 'timestamp': '2025-09-10 02:34:07.930455', 'step': 1337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:07.983407', 'step': 1337, 'epoch': 1} {'type': 'loss', 'content': 0.1183788850903511, 'timestamp': '2025-09-10 02:34:07.985418', 'step': 1338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:08.037796', 'step': 1338, 'epoch': 1} {'type': 'loss', 'content': 0.15694697201251984, 'timestamp': '2025-09-10 02:34:08.039901', 'step': 1339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:08.092622', 'step': 1339, 'epoch': 1} {'type': 'loss', 'content': 0.23560459911823273, 'timestamp': '2025-09-10 02:34:08.098431', 'step': 1340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:08.151561', 'step': 1340, 'epoch': 1} {'type': 'loss', 'content': 0.19737552106380463, 'timestamp': '2025-09-10 02:34:08.153461', 'step': 1341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:08.206080', 'step': 1341, 'epoch': 1} {'type': 'loss', 'content': 0.21500259637832642, 'timestamp': '2025-09-10 02:34:08.208092', 'step': 1342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:08.260526', 'step': 1342, 'epoch': 1} {'type': 'loss', 'content': 0.14402155578136444, 'timestamp': '2025-09-10 02:34:08.262355', 'step': 1343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:08.315243', 'step': 1343, 'epoch': 1} {'type': 'loss', 'content': 0.12346089631319046, 'timestamp': '2025-09-10 02:34:08.321017', 'step': 1344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:08.373019', 'step': 1344, 'epoch': 1} {'type': 'loss', 'content': 0.29235222935676575, 'timestamp': '2025-09-10 02:34:08.375091', 'step': 1345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:08.428536', 'step': 1345, 'epoch': 1} {'type': 'loss', 'content': 0.19911180436611176, 'timestamp': '2025-09-10 02:34:08.430628', 'step': 1346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:08.483830', 'step': 1346, 'epoch': 1} {'type': 'loss', 'content': 0.11671437323093414, 'timestamp': '2025-09-10 02:34:08.485910', 'step': 1347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:08.539258', 'step': 1347, 'epoch': 1} {'type': 'loss', 'content': 0.2229379564523697, 'timestamp': '2025-09-10 02:34:08.545112', 'step': 1348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:08.598194', 'step': 1348, 'epoch': 1} {'type': 'loss', 'content': 0.2166563868522644, 'timestamp': '2025-09-10 02:34:08.600484', 'step': 1349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:08.653407', 'step': 1349, 'epoch': 1} {'type': 'loss', 'content': 0.18576224148273468, 'timestamp': '2025-09-10 02:34:08.655513', 'step': 1350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:08.708252', 'step': 1350, 'epoch': 1} {'type': 'loss', 'content': 0.1275589019060135, 'timestamp': '2025-09-10 02:34:08.710350', 'step': 1351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:08.763734', 'step': 1351, 'epoch': 1} {'type': 'loss', 'content': 0.1581021249294281, 'timestamp': '2025-09-10 02:34:08.769507', 'step': 1352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:08.821956', 'step': 1352, 'epoch': 1} {'type': 'loss', 'content': 0.2200571745634079, 'timestamp': '2025-09-10 02:34:08.823748', 'step': 1353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:08.876464', 'step': 1353, 'epoch': 1} {'type': 'loss', 'content': 0.14903677999973297, 'timestamp': '2025-09-10 02:34:08.878620', 'step': 1354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:08.931720', 'step': 1354, 'epoch': 1} {'type': 'loss', 'content': 0.27774176001548767, 'timestamp': '2025-09-10 02:34:08.933802', 'step': 1355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:08.986920', 'step': 1355, 'epoch': 1} {'type': 'loss', 'content': 0.13733021914958954, 'timestamp': '2025-09-10 02:34:08.992824', 'step': 1356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:09.045198', 'step': 1356, 'epoch': 1} {'type': 'loss', 'content': 0.20618915557861328, 'timestamp': '2025-09-10 02:34:09.047411', 'step': 1357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:09.101022', 'step': 1357, 'epoch': 1} {'type': 'loss', 'content': 0.2909681499004364, 'timestamp': '2025-09-10 02:34:09.103077', 'step': 1358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:09.156637', 'step': 1358, 'epoch': 1} {'type': 'loss', 'content': 0.2387406826019287, 'timestamp': '2025-09-10 02:34:09.158599', 'step': 1359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:09.211826', 'step': 1359, 'epoch': 1} {'type': 'loss', 'content': 0.2030820995569229, 'timestamp': '2025-09-10 02:34:09.217522', 'step': 1360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:09.270010', 'step': 1360, 'epoch': 1} {'type': 'loss', 'content': 0.21452657878398895, 'timestamp': '2025-09-10 02:34:09.272107', 'step': 1361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:09.325395', 'step': 1361, 'epoch': 1} {'type': 'loss', 'content': 0.10299279540777206, 'timestamp': '2025-09-10 02:34:09.327418', 'step': 1362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:09.380430', 'step': 1362, 'epoch': 1} {'type': 'loss', 'content': 0.13016515970230103, 'timestamp': '2025-09-10 02:34:09.382785', 'step': 1363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:09.436937', 'step': 1363, 'epoch': 1} {'type': 'loss', 'content': 0.21408504247665405, 'timestamp': '2025-09-10 02:34:09.443001', 'step': 1364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:09.497839', 'step': 1364, 'epoch': 1} {'type': 'loss', 'content': 0.08896540850400925, 'timestamp': '2025-09-10 02:34:09.499707', 'step': 1365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:09.553563', 'step': 1365, 'epoch': 1} {'type': 'loss', 'content': 0.14448754489421844, 'timestamp': '2025-09-10 02:34:09.555766', 'step': 1366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:09.611731', 'step': 1366, 'epoch': 1} {'type': 'loss', 'content': 0.20899970829486847, 'timestamp': '2025-09-10 02:34:09.613899', 'step': 1367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:09.666859', 'step': 1367, 'epoch': 1} {'type': 'loss', 'content': 0.08920329809188843, 'timestamp': '2025-09-10 02:34:09.672827', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:09.725620', 'step': 1368, 'epoch': 1} {'type': 'loss', 'content': 0.21093204617500305, 'timestamp': '2025-09-10 02:34:09.727645', 'step': 1369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:09.780323', 'step': 1369, 'epoch': 1} {'type': 'loss', 'content': 0.18845731019973755, 'timestamp': '2025-09-10 02:34:09.782376', 'step': 1370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:09.839213', 'step': 1370, 'epoch': 1} {'type': 'loss', 'content': 0.21204933524131775, 'timestamp': '2025-09-10 02:34:09.841412', 'step': 1371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:09.894478', 'step': 1371, 'epoch': 1} {'type': 'loss', 'content': 0.1236763671040535, 'timestamp': '2025-09-10 02:34:09.900457', 'step': 1372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:09.953336', 'step': 1372, 'epoch': 1} {'type': 'loss', 'content': 0.07680883258581161, 'timestamp': '2025-09-10 02:34:09.955386', 'step': 1373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:10.008886', 'step': 1373, 'epoch': 1} {'type': 'loss', 'content': 0.22882091999053955, 'timestamp': '2025-09-10 02:34:10.010884', 'step': 1374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:10.069420', 'step': 1374, 'epoch': 1} {'type': 'loss', 'content': 0.14643654227256775, 'timestamp': '2025-09-10 02:34:10.071528', 'step': 1375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:10.124389', 'step': 1375, 'epoch': 1} {'type': 'loss', 'content': 0.2624644935131073, 'timestamp': '2025-09-10 02:34:10.130353', 'step': 1376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:10.184934', 'step': 1376, 'epoch': 1} {'type': 'loss', 'content': 0.16499656438827515, 'timestamp': '2025-09-10 02:34:10.187179', 'step': 1377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:10.240291', 'step': 1377, 'epoch': 1} {'type': 'loss', 'content': 0.21662084758281708, 'timestamp': '2025-09-10 02:34:10.242612', 'step': 1378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:10.295590', 'step': 1378, 'epoch': 1} {'type': 'loss', 'content': 0.14653778076171875, 'timestamp': '2025-09-10 02:34:10.297588', 'step': 1379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:10.350108', 'step': 1379, 'epoch': 1} {'type': 'loss', 'content': 0.15353354811668396, 'timestamp': '2025-09-10 02:34:10.356024', 'step': 1380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:10.408843', 'step': 1380, 'epoch': 1} {'type': 'loss', 'content': 0.17511865496635437, 'timestamp': '2025-09-10 02:34:10.410975', 'step': 1381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:10.471614', 'step': 1381, 'epoch': 1} {'type': 'loss', 'content': 0.14279283583164215, 'timestamp': '2025-09-10 02:34:10.473722', 'step': 1382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:10.526722', 'step': 1382, 'epoch': 1} {'type': 'loss', 'content': 0.22450818121433258, 'timestamp': '2025-09-10 02:34:10.528956', 'step': 1383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:10.582632', 'step': 1383, 'epoch': 1} {'type': 'loss', 'content': 0.1653411090373993, 'timestamp': '2025-09-10 02:34:10.588550', 'step': 1384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:10.641103', 'step': 1384, 'epoch': 1} {'type': 'loss', 'content': 0.22348840534687042, 'timestamp': '2025-09-10 02:34:10.643115', 'step': 1385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:10.695977', 'step': 1385, 'epoch': 1} {'type': 'loss', 'content': 0.1815728396177292, 'timestamp': '2025-09-10 02:34:10.698141', 'step': 1386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:10.751322', 'step': 1386, 'epoch': 1} {'type': 'loss', 'content': 0.16079264879226685, 'timestamp': '2025-09-10 02:34:10.753314', 'step': 1387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:10.805751', 'step': 1387, 'epoch': 1} {'type': 'loss', 'content': 0.16628871858119965, 'timestamp': '2025-09-10 02:34:10.811488', 'step': 1388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:10.863820', 'step': 1388, 'epoch': 1} {'type': 'loss', 'content': 0.20149804651737213, 'timestamp': '2025-09-10 02:34:10.865935', 'step': 1389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:10.918824', 'step': 1389, 'epoch': 1} {'type': 'loss', 'content': 0.13775025308132172, 'timestamp': '2025-09-10 02:34:10.920870', 'step': 1390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:10.973658', 'step': 1390, 'epoch': 1} {'type': 'loss', 'content': 0.18655626475811005, 'timestamp': '2025-09-10 02:34:10.975686', 'step': 1391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:11.029288', 'step': 1391, 'epoch': 1} {'type': 'loss', 'content': 0.07906336337327957, 'timestamp': '2025-09-10 02:34:11.035225', 'step': 1392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:11.089615', 'step': 1392, 'epoch': 1} {'type': 'loss', 'content': 0.2296856939792633, 'timestamp': '2025-09-10 02:34:11.091741', 'step': 1393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:11.145539', 'step': 1393, 'epoch': 1} {'type': 'loss', 'content': 0.17312081158161163, 'timestamp': '2025-09-10 02:34:11.147393', 'step': 1394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:11.200806', 'step': 1394, 'epoch': 1} {'type': 'loss', 'content': 0.1253776103258133, 'timestamp': '2025-09-10 02:34:11.202995', 'step': 1395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:11.255903', 'step': 1395, 'epoch': 1} {'type': 'loss', 'content': 0.247130885720253, 'timestamp': '2025-09-10 02:34:11.261741', 'step': 1396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:11.313934', 'step': 1396, 'epoch': 1} {'type': 'loss', 'content': 0.2040875405073166, 'timestamp': '2025-09-10 02:34:11.315983', 'step': 1397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:11.368982', 'step': 1397, 'epoch': 1} {'type': 'loss', 'content': 0.2203451544046402, 'timestamp': '2025-09-10 02:34:11.371086', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:34:23.885557', 'step': 1398, 'epoch': 1} {'type': 'pplx', 'content': 10745.852287194624, 'timestamp': '2025-09-10 02:34:23.888845', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:23.945433', 'step': 1398, 'epoch': 1} {'type': 'loss', 'content': 0.24628514051437378, 'timestamp': '2025-09-10 02:34:23.947137', 'step': 1399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:24.002351', 'step': 1399, 'epoch': 1} {'type': 'loss', 'content': 0.14638040959835052, 'timestamp': '2025-09-10 02:34:24.009554', 'step': 1400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:24.067469', 'step': 1400, 'epoch': 1} {'type': 'loss', 'content': 0.21406112611293793, 'timestamp': '2025-09-10 02:34:24.069655', 'step': 1401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:24.124274', 'step': 1401, 'epoch': 1} {'type': 'loss', 'content': 0.1990862339735031, 'timestamp': '2025-09-10 02:34:24.126584', 'step': 1402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:24.180564', 'step': 1402, 'epoch': 1} {'type': 'loss', 'content': 0.17336507141590118, 'timestamp': '2025-09-10 02:34:24.182531', 'step': 1403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:24.236521', 'step': 1403, 'epoch': 1} {'type': 'loss', 'content': 0.18454763293266296, 'timestamp': '2025-09-10 02:34:24.242663', 'step': 1404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:24.296953', 'step': 1404, 'epoch': 1} {'type': 'loss', 'content': 0.17177270352840424, 'timestamp': '2025-09-10 02:34:24.298954', 'step': 1405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:24.353721', 'step': 1405, 'epoch': 1} {'type': 'loss', 'content': 0.1585163176059723, 'timestamp': '2025-09-10 02:34:24.355676', 'step': 1406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:24.408736', 'step': 1406, 'epoch': 1} {'type': 'loss', 'content': 0.14341840147972107, 'timestamp': '2025-09-10 02:34:24.410855', 'step': 1407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:24.465309', 'step': 1407, 'epoch': 1} {'type': 'loss', 'content': 0.2724792957305908, 'timestamp': '2025-09-10 02:34:24.471593', 'step': 1408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:24.526740', 'step': 1408, 'epoch': 1} {'type': 'loss', 'content': 0.15390342473983765, 'timestamp': '2025-09-10 02:34:24.528803', 'step': 1409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:24.584409', 'step': 1409, 'epoch': 1} {'type': 'loss', 'content': 0.18034778535366058, 'timestamp': '2025-09-10 02:34:24.586468', 'step': 1410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:24.644889', 'step': 1410, 'epoch': 1} {'type': 'loss', 'content': 0.1770976185798645, 'timestamp': '2025-09-10 02:34:24.646922', 'step': 1411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:24.703616', 'step': 1411, 'epoch': 1} {'type': 'loss', 'content': 0.12064293026924133, 'timestamp': '2025-09-10 02:34:24.710378', 'step': 1412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:24.766793', 'step': 1412, 'epoch': 1} {'type': 'loss', 'content': 0.22083263099193573, 'timestamp': '2025-09-10 02:34:24.768865', 'step': 1413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:24.828790', 'step': 1413, 'epoch': 1} {'type': 'loss', 'content': 0.2013239711523056, 'timestamp': '2025-09-10 02:34:24.830860', 'step': 1414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:24.887813', 'step': 1414, 'epoch': 1} {'type': 'loss', 'content': 0.15457399189472198, 'timestamp': '2025-09-10 02:34:24.890059', 'step': 1415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:24.946399', 'step': 1415, 'epoch': 1} {'type': 'loss', 'content': 0.11217954754829407, 'timestamp': '2025-09-10 02:34:24.953725', 'step': 1416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:25.011855', 'step': 1416, 'epoch': 1} {'type': 'loss', 'content': 0.2804694175720215, 'timestamp': '2025-09-10 02:34:25.014017', 'step': 1417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:25.073905', 'step': 1417, 'epoch': 1} {'type': 'loss', 'content': 0.18654364347457886, 'timestamp': '2025-09-10 02:34:25.076223', 'step': 1418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:25.136619', 'step': 1418, 'epoch': 1} {'type': 'loss', 'content': 0.23234818875789642, 'timestamp': '2025-09-10 02:34:25.138714', 'step': 1419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:25.196233', 'step': 1419, 'epoch': 1} {'type': 'loss', 'content': 0.27092215418815613, 'timestamp': '2025-09-10 02:34:25.202876', 'step': 1420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:25.262122', 'step': 1420, 'epoch': 1} {'type': 'loss', 'content': 0.15393465757369995, 'timestamp': '2025-09-10 02:34:25.264258', 'step': 1421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:25.328471', 'step': 1421, 'epoch': 1} {'type': 'loss', 'content': 0.18075141310691833, 'timestamp': '2025-09-10 02:34:25.330504', 'step': 1422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:25.387458', 'step': 1422, 'epoch': 1} {'type': 'loss', 'content': 0.13621735572814941, 'timestamp': '2025-09-10 02:34:25.389570', 'step': 1423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:25.443747', 'step': 1423, 'epoch': 1} {'type': 'loss', 'content': 0.23261405527591705, 'timestamp': '2025-09-10 02:34:25.449943', 'step': 1424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:25.503130', 'step': 1424, 'epoch': 1} {'type': 'loss', 'content': 0.1521192193031311, 'timestamp': '2025-09-10 02:34:25.505192', 'step': 1425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:25.558585', 'step': 1425, 'epoch': 1} {'type': 'loss', 'content': 0.27773234248161316, 'timestamp': '2025-09-10 02:34:25.560658', 'step': 1426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:25.615880', 'step': 1426, 'epoch': 1} {'type': 'loss', 'content': 0.19271723926067352, 'timestamp': '2025-09-10 02:34:25.617865', 'step': 1427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:25.673514', 'step': 1427, 'epoch': 1} {'type': 'loss', 'content': 0.17914851009845734, 'timestamp': '2025-09-10 02:34:25.679641', 'step': 1428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:25.733315', 'step': 1428, 'epoch': 1} {'type': 'loss', 'content': 0.16771826148033142, 'timestamp': '2025-09-10 02:34:25.735364', 'step': 1429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:25.790908', 'step': 1429, 'epoch': 1} {'type': 'loss', 'content': 0.19357378780841827, 'timestamp': '2025-09-10 02:34:25.793100', 'step': 1430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:25.848183', 'step': 1430, 'epoch': 1} {'type': 'loss', 'content': 0.2609681487083435, 'timestamp': '2025-09-10 02:34:25.850455', 'step': 1431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:25.903802', 'step': 1431, 'epoch': 1} {'type': 'loss', 'content': 0.21174775063991547, 'timestamp': '2025-09-10 02:34:25.909990', 'step': 1432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:25.963852', 'step': 1432, 'epoch': 1} {'type': 'loss', 'content': 0.1436004787683487, 'timestamp': '2025-09-10 02:34:25.965973', 'step': 1433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:26.020375', 'step': 1433, 'epoch': 1} {'type': 'loss', 'content': 0.28683900833129883, 'timestamp': '2025-09-10 02:34:26.022444', 'step': 1434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:26.075768', 'step': 1434, 'epoch': 1} {'type': 'loss', 'content': 0.13177309930324554, 'timestamp': '2025-09-10 02:34:26.077747', 'step': 1435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:26.131431', 'step': 1435, 'epoch': 1} {'type': 'loss', 'content': 0.15788665413856506, 'timestamp': '2025-09-10 02:34:26.137573', 'step': 1436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:26.190770', 'step': 1436, 'epoch': 1} {'type': 'loss', 'content': 0.235310897231102, 'timestamp': '2025-09-10 02:34:26.192832', 'step': 1437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:26.247015', 'step': 1437, 'epoch': 1} {'type': 'loss', 'content': 0.17377062141895294, 'timestamp': '2025-09-10 02:34:26.248884', 'step': 1438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:26.303408', 'step': 1438, 'epoch': 1} {'type': 'loss', 'content': 0.20764757692813873, 'timestamp': '2025-09-10 02:34:26.305424', 'step': 1439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:26.359908', 'step': 1439, 'epoch': 1} {'type': 'loss', 'content': 0.2095273733139038, 'timestamp': '2025-09-10 02:34:26.366082', 'step': 1440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:26.419265', 'step': 1440, 'epoch': 1} {'type': 'loss', 'content': 0.15036961436271667, 'timestamp': '2025-09-10 02:34:26.421342', 'step': 1441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:26.475448', 'step': 1441, 'epoch': 1} {'type': 'loss', 'content': 0.2455081343650818, 'timestamp': '2025-09-10 02:34:26.477703', 'step': 1442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:26.533609', 'step': 1442, 'epoch': 1} {'type': 'loss', 'content': 0.18667346239089966, 'timestamp': '2025-09-10 02:34:26.535751', 'step': 1443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:26.590440', 'step': 1443, 'epoch': 1} {'type': 'loss', 'content': 0.26025819778442383, 'timestamp': '2025-09-10 02:34:26.597113', 'step': 1444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:26.653003', 'step': 1444, 'epoch': 1} {'type': 'loss', 'content': 0.3306911587715149, 'timestamp': '2025-09-10 02:34:26.655299', 'step': 1445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:26.709014', 'step': 1445, 'epoch': 1} {'type': 'loss', 'content': 0.12933696806430817, 'timestamp': '2025-09-10 02:34:26.711108', 'step': 1446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:26.766171', 'step': 1446, 'epoch': 1} {'type': 'loss', 'content': 0.17061719298362732, 'timestamp': '2025-09-10 02:34:26.768275', 'step': 1447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:26.823860', 'step': 1447, 'epoch': 1} {'type': 'loss', 'content': 0.24891576170921326, 'timestamp': '2025-09-10 02:34:26.830484', 'step': 1448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:26.884422', 'step': 1448, 'epoch': 1} {'type': 'loss', 'content': 0.16682155430316925, 'timestamp': '2025-09-10 02:34:26.886465', 'step': 1449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:26.940574', 'step': 1449, 'epoch': 1} {'type': 'loss', 'content': 0.20411865413188934, 'timestamp': '2025-09-10 02:34:26.942597', 'step': 1450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:26.997286', 'step': 1450, 'epoch': 1} {'type': 'loss', 'content': 0.24485357105731964, 'timestamp': '2025-09-10 02:34:26.999236', 'step': 1451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:27.054096', 'step': 1451, 'epoch': 1} {'type': 'loss', 'content': 0.16897207498550415, 'timestamp': '2025-09-10 02:34:27.060148', 'step': 1452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:27.113730', 'step': 1452, 'epoch': 1} {'type': 'loss', 'content': 0.15588843822479248, 'timestamp': '2025-09-10 02:34:27.115789', 'step': 1453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:27.169844', 'step': 1453, 'epoch': 1} {'type': 'loss', 'content': 0.10282952338457108, 'timestamp': '2025-09-10 02:34:27.171940', 'step': 1454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:27.226883', 'step': 1454, 'epoch': 1} {'type': 'loss', 'content': 0.25165414810180664, 'timestamp': '2025-09-10 02:34:27.228891', 'step': 1455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:27.282051', 'step': 1455, 'epoch': 1} {'type': 'loss', 'content': 0.11646559089422226, 'timestamp': '2025-09-10 02:34:27.288368', 'step': 1456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:27.344833', 'step': 1456, 'epoch': 1} {'type': 'loss', 'content': 0.17705486714839935, 'timestamp': '2025-09-10 02:34:27.346800', 'step': 1457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:27.402121', 'step': 1457, 'epoch': 1} {'type': 'loss', 'content': 0.14934566617012024, 'timestamp': '2025-09-10 02:34:27.404096', 'step': 1458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:27.459460', 'step': 1458, 'epoch': 1} {'type': 'loss', 'content': 0.1789768785238266, 'timestamp': '2025-09-10 02:34:27.461637', 'step': 1459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:27.515517', 'step': 1459, 'epoch': 1} {'type': 'loss', 'content': 0.13343971967697144, 'timestamp': '2025-09-10 02:34:27.521835', 'step': 1460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:27.576214', 'step': 1460, 'epoch': 1} {'type': 'loss', 'content': 0.2249745875597, 'timestamp': '2025-09-10 02:34:27.578259', 'step': 1461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:27.633192', 'step': 1461, 'epoch': 1} {'type': 'loss', 'content': 0.19961988925933838, 'timestamp': '2025-09-10 02:34:27.635326', 'step': 1462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:27.688732', 'step': 1462, 'epoch': 1} {'type': 'loss', 'content': 0.2818447947502136, 'timestamp': '2025-09-10 02:34:27.690757', 'step': 1463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:27.744761', 'step': 1463, 'epoch': 1} {'type': 'loss', 'content': 0.2422439008951187, 'timestamp': '2025-09-10 02:34:27.750959', 'step': 1464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:27.804125', 'step': 1464, 'epoch': 1} {'type': 'loss', 'content': 0.22489845752716064, 'timestamp': '2025-09-10 02:34:27.806257', 'step': 1465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:27.861345', 'step': 1465, 'epoch': 1} {'type': 'loss', 'content': 0.16939905285835266, 'timestamp': '2025-09-10 02:34:27.863301', 'step': 1466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:27.917948', 'step': 1466, 'epoch': 1} {'type': 'loss', 'content': 0.1448277235031128, 'timestamp': '2025-09-10 02:34:27.919957', 'step': 1467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:27.974776', 'step': 1467, 'epoch': 1} {'type': 'loss', 'content': 0.24777033925056458, 'timestamp': '2025-09-10 02:34:27.980938', 'step': 1468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:28.034462', 'step': 1468, 'epoch': 1} {'type': 'loss', 'content': 0.29773640632629395, 'timestamp': '2025-09-10 02:34:28.036540', 'step': 1469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:28.091832', 'step': 1469, 'epoch': 1} {'type': 'loss', 'content': 0.24712234735488892, 'timestamp': '2025-09-10 02:34:28.093646', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:28.147357', 'step': 1470, 'epoch': 1} {'type': 'loss', 'content': 0.14748869836330414, 'timestamp': '2025-09-10 02:34:28.149449', 'step': 1471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:28.202537', 'step': 1471, 'epoch': 1} {'type': 'loss', 'content': 0.13824652135372162, 'timestamp': '2025-09-10 02:34:28.208632', 'step': 1472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:28.263068', 'step': 1472, 'epoch': 1} {'type': 'loss', 'content': 0.14484262466430664, 'timestamp': '2025-09-10 02:34:28.266131', 'step': 1473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:28.321141', 'step': 1473, 'epoch': 1} {'type': 'loss', 'content': 0.19894984364509583, 'timestamp': '2025-09-10 02:34:28.323363', 'step': 1474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:28.378978', 'step': 1474, 'epoch': 1} {'type': 'loss', 'content': 0.13691702485084534, 'timestamp': '2025-09-10 02:34:28.380971', 'step': 1475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:28.437258', 'step': 1475, 'epoch': 1} {'type': 'loss', 'content': 0.1668364554643631, 'timestamp': '2025-09-10 02:34:28.444079', 'step': 1476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:28.500462', 'step': 1476, 'epoch': 1} {'type': 'loss', 'content': 0.2378956824541092, 'timestamp': '2025-09-10 02:34:28.502511', 'step': 1477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:28.562243', 'step': 1477, 'epoch': 1} {'type': 'loss', 'content': 0.07987246662378311, 'timestamp': '2025-09-10 02:34:28.564346', 'step': 1478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:28.621782', 'step': 1478, 'epoch': 1} {'type': 'loss', 'content': 0.24885395169258118, 'timestamp': '2025-09-10 02:34:28.623848', 'step': 1479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:28.682859', 'step': 1479, 'epoch': 1} {'type': 'loss', 'content': 0.1960984617471695, 'timestamp': '2025-09-10 02:34:28.689816', 'step': 1480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:28.743239', 'step': 1480, 'epoch': 1} {'type': 'loss', 'content': 0.11455456167459488, 'timestamp': '2025-09-10 02:34:28.745525', 'step': 1481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:28.800832', 'step': 1481, 'epoch': 1} {'type': 'loss', 'content': 0.19049568474292755, 'timestamp': '2025-09-10 02:34:28.803110', 'step': 1482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:28.862136', 'step': 1482, 'epoch': 1} {'type': 'loss', 'content': 0.1877991259098053, 'timestamp': '2025-09-10 02:34:28.865972', 'step': 1483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:28.922899', 'step': 1483, 'epoch': 1} {'type': 'loss', 'content': 0.1455410271883011, 'timestamp': '2025-09-10 02:34:28.929068', 'step': 1484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:28.984012', 'step': 1484, 'epoch': 1} {'type': 'loss', 'content': 0.2462531179189682, 'timestamp': '2025-09-10 02:34:28.986188', 'step': 1485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:29.041988', 'step': 1485, 'epoch': 1} {'type': 'loss', 'content': 0.19389238953590393, 'timestamp': '2025-09-10 02:34:29.044017', 'step': 1486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:29.098744', 'step': 1486, 'epoch': 1} {'type': 'loss', 'content': 0.13761572539806366, 'timestamp': '2025-09-10 02:34:29.101103', 'step': 1487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:29.159612', 'step': 1487, 'epoch': 1} {'type': 'loss', 'content': 0.17063480615615845, 'timestamp': '2025-09-10 02:34:29.165851', 'step': 1488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:29.220225', 'step': 1488, 'epoch': 1} {'type': 'loss', 'content': 0.2029617428779602, 'timestamp': '2025-09-10 02:34:29.222309', 'step': 1489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:29.276670', 'step': 1489, 'epoch': 1} {'type': 'loss', 'content': 0.2158571481704712, 'timestamp': '2025-09-10 02:34:29.279455', 'step': 1490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:29.335000', 'step': 1490, 'epoch': 1} {'type': 'loss', 'content': 0.1840190440416336, 'timestamp': '2025-09-10 02:34:29.337182', 'step': 1491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:29.400211', 'step': 1491, 'epoch': 1} {'type': 'loss', 'content': 0.18694022297859192, 'timestamp': '2025-09-10 02:34:29.406497', 'step': 1492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:29.459132', 'step': 1492, 'epoch': 1} {'type': 'loss', 'content': 0.2444974184036255, 'timestamp': '2025-09-10 02:34:29.461355', 'step': 1493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:29.514586', 'step': 1493, 'epoch': 1} {'type': 'loss', 'content': 0.15605711936950684, 'timestamp': '2025-09-10 02:34:29.516853', 'step': 1494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:29.570657', 'step': 1494, 'epoch': 1} {'type': 'loss', 'content': 0.2113220989704132, 'timestamp': '2025-09-10 02:34:29.572727', 'step': 1495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:29.626593', 'step': 1495, 'epoch': 1} {'type': 'loss', 'content': 0.3255558907985687, 'timestamp': '2025-09-10 02:34:29.634221', 'step': 1496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:29.687020', 'step': 1496, 'epoch': 1} {'type': 'loss', 'content': 0.1816779226064682, 'timestamp': '2025-09-10 02:34:29.689036', 'step': 1497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:29.742929', 'step': 1497, 'epoch': 1} {'type': 'loss', 'content': 0.14402668178081512, 'timestamp': '2025-09-10 02:34:29.745027', 'step': 1498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:29.799777', 'step': 1498, 'epoch': 1} {'type': 'loss', 'content': 0.11284022778272629, 'timestamp': '2025-09-10 02:34:29.801944', 'step': 1499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:29.856170', 'step': 1499, 'epoch': 1} {'type': 'loss', 'content': 0.20837514102458954, 'timestamp': '2025-09-10 02:34:29.862600', 'step': 1500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-09-10 02:34:30.414525', 'step': 1500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:30.473179', 'step': 1500, 'epoch': 1} {'type': 'loss', 'content': 0.21696285903453827, 'timestamp': '2025-09-10 02:34:30.475302', 'step': 1501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:30.529356', 'step': 1501, 'epoch': 1} {'type': 'loss', 'content': 0.30352583527565, 'timestamp': '2025-09-10 02:34:30.531449', 'step': 1502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:30.585144', 'step': 1502, 'epoch': 1} {'type': 'loss', 'content': 0.18799906969070435, 'timestamp': '2025-09-10 02:34:30.587433', 'step': 1503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:30.641415', 'step': 1503, 'epoch': 1} {'type': 'loss', 'content': 0.1412578970193863, 'timestamp': '2025-09-10 02:34:30.647494', 'step': 1504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:30.699984', 'step': 1504, 'epoch': 1} {'type': 'loss', 'content': 0.18960201740264893, 'timestamp': '2025-09-10 02:34:30.701943', 'step': 1505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:30.757618', 'step': 1505, 'epoch': 1} {'type': 'loss', 'content': 0.19365940988063812, 'timestamp': '2025-09-10 02:34:30.759570', 'step': 1506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:30.812762', 'step': 1506, 'epoch': 1} {'type': 'loss', 'content': 0.17869533598423004, 'timestamp': '2025-09-10 02:34:30.814866', 'step': 1507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:30.867801', 'step': 1507, 'epoch': 1} {'type': 'loss', 'content': 0.1702333390712738, 'timestamp': '2025-09-10 02:34:30.873748', 'step': 1508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:30.925595', 'step': 1508, 'epoch': 1} {'type': 'loss', 'content': 0.1733769029378891, 'timestamp': '2025-09-10 02:34:30.927556', 'step': 1509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:30.980179', 'step': 1509, 'epoch': 1} {'type': 'loss', 'content': 0.206522598862648, 'timestamp': '2025-09-10 02:34:30.982403', 'step': 1510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:31.035613', 'step': 1510, 'epoch': 1} {'type': 'loss', 'content': 0.12931454181671143, 'timestamp': '2025-09-10 02:34:31.037608', 'step': 1511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:31.090237', 'step': 1511, 'epoch': 1} {'type': 'loss', 'content': 0.16736498475074768, 'timestamp': '2025-09-10 02:34:31.096681', 'step': 1512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:31.149853', 'step': 1512, 'epoch': 1} {'type': 'loss', 'content': 0.1817881464958191, 'timestamp': '2025-09-10 02:34:31.152092', 'step': 1513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:31.206599', 'step': 1513, 'epoch': 1} {'type': 'loss', 'content': 0.36805570125579834, 'timestamp': '2025-09-10 02:34:31.208871', 'step': 1514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:31.262698', 'step': 1514, 'epoch': 1} {'type': 'loss', 'content': 0.2522675693035126, 'timestamp': '2025-09-10 02:34:31.265678', 'step': 1515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:31.319063', 'step': 1515, 'epoch': 1} {'type': 'loss', 'content': 0.20218674838542938, 'timestamp': '2025-09-10 02:34:31.325518', 'step': 1516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:31.378671', 'step': 1516, 'epoch': 1} {'type': 'loss', 'content': 0.15051916241645813, 'timestamp': '2025-09-10 02:34:31.382066', 'step': 1517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:31.435304', 'step': 1517, 'epoch': 1} {'type': 'loss', 'content': 0.2026928812265396, 'timestamp': '2025-09-10 02:34:31.437493', 'step': 1518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:31.492102', 'step': 1518, 'epoch': 1} {'type': 'loss', 'content': 0.3356002867221832, 'timestamp': '2025-09-10 02:34:31.494117', 'step': 1519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:31.546722', 'step': 1519, 'epoch': 1} {'type': 'loss', 'content': 0.10944835841655731, 'timestamp': '2025-09-10 02:34:31.552740', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:31.604696', 'step': 1520, 'epoch': 1} {'type': 'loss', 'content': 0.07359690964221954, 'timestamp': '2025-09-10 02:34:31.606672', 'step': 1521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:31.666805', 'step': 1521, 'epoch': 1} {'type': 'loss', 'content': 0.19520334899425507, 'timestamp': '2025-09-10 02:34:31.669094', 'step': 1522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:31.723184', 'step': 1522, 'epoch': 1} {'type': 'loss', 'content': 0.18606974184513092, 'timestamp': '2025-09-10 02:34:31.725355', 'step': 1523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:31.777866', 'step': 1523, 'epoch': 1} {'type': 'loss', 'content': 0.18773046135902405, 'timestamp': '2025-09-10 02:34:31.783990', 'step': 1524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:31.836316', 'step': 1524, 'epoch': 1} {'type': 'loss', 'content': 0.21152999997138977, 'timestamp': '2025-09-10 02:34:31.838292', 'step': 1525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:31.890632', 'step': 1525, 'epoch': 1} {'type': 'loss', 'content': 0.2058148980140686, 'timestamp': '2025-09-10 02:34:31.892589', 'step': 1526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:31.945778', 'step': 1526, 'epoch': 1} {'type': 'loss', 'content': 0.2451835572719574, 'timestamp': '2025-09-10 02:34:31.947758', 'step': 1527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:32.000558', 'step': 1527, 'epoch': 1} {'type': 'loss', 'content': 0.11859972029924393, 'timestamp': '2025-09-10 02:34:32.006502', 'step': 1528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:32.058603', 'step': 1528, 'epoch': 1} {'type': 'loss', 'content': 0.2049039602279663, 'timestamp': '2025-09-10 02:34:32.060537', 'step': 1529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:32.113429', 'step': 1529, 'epoch': 1} {'type': 'loss', 'content': 0.16005536913871765, 'timestamp': '2025-09-10 02:34:32.115565', 'step': 1530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:32.169578', 'step': 1530, 'epoch': 1} {'type': 'loss', 'content': 0.13595028221607208, 'timestamp': '2025-09-10 02:34:32.171862', 'step': 1531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:32.229218', 'step': 1531, 'epoch': 1} {'type': 'loss', 'content': 0.22304821014404297, 'timestamp': '2025-09-10 02:34:32.235647', 'step': 1532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:32.289233', 'step': 1532, 'epoch': 1} {'type': 'loss', 'content': 0.15885744988918304, 'timestamp': '2025-09-10 02:34:32.291324', 'step': 1533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:32.346534', 'step': 1533, 'epoch': 1} {'type': 'loss', 'content': 0.17767758667469025, 'timestamp': '2025-09-10 02:34:32.348507', 'step': 1534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:32.403484', 'step': 1534, 'epoch': 1} {'type': 'loss', 'content': 0.2536170780658722, 'timestamp': '2025-09-10 02:34:32.405475', 'step': 1535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:32.460152', 'step': 1535, 'epoch': 1} {'type': 'loss', 'content': 0.11386964470148087, 'timestamp': '2025-09-10 02:34:32.466329', 'step': 1536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:32.518742', 'step': 1536, 'epoch': 1} {'type': 'loss', 'content': 0.13693560659885406, 'timestamp': '2025-09-10 02:34:32.520710', 'step': 1537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:32.572828', 'step': 1537, 'epoch': 1} {'type': 'loss', 'content': 0.283549040555954, 'timestamp': '2025-09-10 02:34:32.574864', 'step': 1538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:32.627797', 'step': 1538, 'epoch': 1} {'type': 'loss', 'content': 0.22521908581256866, 'timestamp': '2025-09-10 02:34:32.629831', 'step': 1539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:32.682535', 'step': 1539, 'epoch': 1} {'type': 'loss', 'content': 0.19630536437034607, 'timestamp': '2025-09-10 02:34:32.688416', 'step': 1540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:32.740997', 'step': 1540, 'epoch': 1} {'type': 'loss', 'content': 0.2539031505584717, 'timestamp': '2025-09-10 02:34:32.743048', 'step': 1541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:32.795112', 'step': 1541, 'epoch': 1} {'type': 'loss', 'content': 0.22977329790592194, 'timestamp': '2025-09-10 02:34:32.797185', 'step': 1542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:32.850149', 'step': 1542, 'epoch': 1} {'type': 'loss', 'content': 0.11084478348493576, 'timestamp': '2025-09-10 02:34:32.852092', 'step': 1543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:32.904695', 'step': 1543, 'epoch': 1} {'type': 'loss', 'content': 0.11967580020427704, 'timestamp': '2025-09-10 02:34:32.910541', 'step': 1544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:32.963139', 'step': 1544, 'epoch': 1} {'type': 'loss', 'content': 0.32059618830680847, 'timestamp': '2025-09-10 02:34:32.965210', 'step': 1545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:33.018299', 'step': 1545, 'epoch': 1} {'type': 'loss', 'content': 0.14703190326690674, 'timestamp': '2025-09-10 02:34:33.020397', 'step': 1546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:33.073227', 'step': 1546, 'epoch': 1} {'type': 'loss', 'content': 0.10751300305128098, 'timestamp': '2025-09-10 02:34:33.075218', 'step': 1547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:33.127692', 'step': 1547, 'epoch': 1} {'type': 'loss', 'content': 0.21905292570590973, 'timestamp': '2025-09-10 02:34:33.133498', 'step': 1548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:33.185473', 'step': 1548, 'epoch': 1} {'type': 'loss', 'content': 0.19237667322158813, 'timestamp': '2025-09-10 02:34:33.187471', 'step': 1549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:33.239687', 'step': 1549, 'epoch': 1} {'type': 'loss', 'content': 0.1203780472278595, 'timestamp': '2025-09-10 02:34:33.241813', 'step': 1550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:33.306652', 'step': 1550, 'epoch': 1} {'type': 'loss', 'content': 0.14202433824539185, 'timestamp': '2025-09-10 02:34:33.308672', 'step': 1551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:33.361739', 'step': 1551, 'epoch': 1} {'type': 'loss', 'content': 0.10980695486068726, 'timestamp': '2025-09-10 02:34:33.367749', 'step': 1552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:33.421692', 'step': 1552, 'epoch': 1} {'type': 'loss', 'content': 0.16663259267807007, 'timestamp': '2025-09-10 02:34:33.423691', 'step': 1553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:33.478743', 'step': 1553, 'epoch': 1} {'type': 'loss', 'content': 0.17551426589488983, 'timestamp': '2025-09-10 02:34:33.480746', 'step': 1554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:33.534199', 'step': 1554, 'epoch': 1} {'type': 'loss', 'content': 0.28581419587135315, 'timestamp': '2025-09-10 02:34:33.536365', 'step': 1555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:33.602359', 'step': 1555, 'epoch': 1} {'type': 'loss', 'content': 0.1728542596101761, 'timestamp': '2025-09-10 02:34:33.608467', 'step': 1556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:33.673673', 'step': 1556, 'epoch': 1} {'type': 'loss', 'content': 0.19413959980010986, 'timestamp': '2025-09-10 02:34:33.675655', 'step': 1557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:33.729224', 'step': 1557, 'epoch': 1} {'type': 'loss', 'content': 0.14831671118736267, 'timestamp': '2025-09-10 02:34:33.732768', 'step': 1558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:33.788250', 'step': 1558, 'epoch': 1} {'type': 'loss', 'content': 0.18657223880290985, 'timestamp': '2025-09-10 02:34:33.790547', 'step': 1559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:33.844734', 'step': 1559, 'epoch': 1} {'type': 'loss', 'content': 0.21516244113445282, 'timestamp': '2025-09-10 02:34:33.851194', 'step': 1560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:33.904825', 'step': 1560, 'epoch': 1} {'type': 'loss', 'content': 0.22383874654769897, 'timestamp': '2025-09-10 02:34:33.906920', 'step': 1561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:33.960006', 'step': 1561, 'epoch': 1} {'type': 'loss', 'content': 0.17657245695590973, 'timestamp': '2025-09-10 02:34:33.962056', 'step': 1562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:34.021464', 'step': 1562, 'epoch': 1} {'type': 'loss', 'content': 0.2136809080839157, 'timestamp': '2025-09-10 02:34:34.026908', 'step': 1563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:34.086604', 'step': 1563, 'epoch': 1} {'type': 'loss', 'content': 0.1639854907989502, 'timestamp': '2025-09-10 02:34:34.096466', 'step': 1564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:34.154388', 'step': 1564, 'epoch': 1} {'type': 'loss', 'content': 0.16838932037353516, 'timestamp': '2025-09-10 02:34:34.159606', 'step': 1565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:34.213083', 'step': 1565, 'epoch': 1} {'type': 'loss', 'content': 0.25362908840179443, 'timestamp': '2025-09-10 02:34:34.215302', 'step': 1566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:34.268405', 'step': 1566, 'epoch': 1} {'type': 'loss', 'content': 0.10614466667175293, 'timestamp': '2025-09-10 02:34:34.270540', 'step': 1567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:34.323675', 'step': 1567, 'epoch': 1} {'type': 'loss', 'content': 0.2855784296989441, 'timestamp': '2025-09-10 02:34:34.330134', 'step': 1568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:34.384354', 'step': 1568, 'epoch': 1} {'type': 'loss', 'content': 0.14798817038536072, 'timestamp': '2025-09-10 02:34:34.386355', 'step': 1569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:34.440163', 'step': 1569, 'epoch': 1} {'type': 'loss', 'content': 0.18944358825683594, 'timestamp': '2025-09-10 02:34:34.442691', 'step': 1570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:34.496224', 'step': 1570, 'epoch': 1} {'type': 'loss', 'content': 0.1359698623418808, 'timestamp': '2025-09-10 02:34:34.498458', 'step': 1571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:34.553218', 'step': 1571, 'epoch': 1} {'type': 'loss', 'content': 0.318032443523407, 'timestamp': '2025-09-10 02:34:34.559626', 'step': 1572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:34.612699', 'step': 1572, 'epoch': 1} {'type': 'loss', 'content': 0.1505068987607956, 'timestamp': '2025-09-10 02:34:34.614816', 'step': 1573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:34.667943', 'step': 1573, 'epoch': 1} {'type': 'loss', 'content': 0.09811975806951523, 'timestamp': '2025-09-10 02:34:34.670205', 'step': 1574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:34.723315', 'step': 1574, 'epoch': 1} {'type': 'loss', 'content': 0.1837185174226761, 'timestamp': '2025-09-10 02:34:34.725540', 'step': 1575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:34.780229', 'step': 1575, 'epoch': 1} {'type': 'loss', 'content': 0.2103128880262375, 'timestamp': '2025-09-10 02:34:34.786195', 'step': 1576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:34.839101', 'step': 1576, 'epoch': 1} {'type': 'loss', 'content': 0.19603846967220306, 'timestamp': '2025-09-10 02:34:34.841453', 'step': 1577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:34.895775', 'step': 1577, 'epoch': 1} {'type': 'loss', 'content': 0.12499301880598068, 'timestamp': '2025-09-10 02:34:34.898061', 'step': 1578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:34.952106', 'step': 1578, 'epoch': 1} {'type': 'loss', 'content': 0.1791592538356781, 'timestamp': '2025-09-10 02:34:34.954454', 'step': 1579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:35.007418', 'step': 1579, 'epoch': 1} {'type': 'loss', 'content': 0.1948510706424713, 'timestamp': '2025-09-10 02:34:35.013682', 'step': 1580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:35.065577', 'step': 1580, 'epoch': 1} {'type': 'loss', 'content': 0.15745769441127777, 'timestamp': '2025-09-10 02:34:35.067736', 'step': 1581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:35.120324', 'step': 1581, 'epoch': 1} {'type': 'loss', 'content': 0.20041532814502716, 'timestamp': '2025-09-10 02:34:35.122507', 'step': 1582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:35.176235', 'step': 1582, 'epoch': 1} {'type': 'loss', 'content': 0.22642216086387634, 'timestamp': '2025-09-10 02:34:35.178396', 'step': 1583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:35.234054', 'step': 1583, 'epoch': 1} {'type': 'loss', 'content': 0.18295595049858093, 'timestamp': '2025-09-10 02:34:35.240847', 'step': 1584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:35.296975', 'step': 1584, 'epoch': 1} {'type': 'loss', 'content': 0.18870775401592255, 'timestamp': '2025-09-10 02:34:35.299187', 'step': 1585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:35.353005', 'step': 1585, 'epoch': 1} {'type': 'loss', 'content': 0.18060746788978577, 'timestamp': '2025-09-10 02:34:35.355315', 'step': 1586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:35.410937', 'step': 1586, 'epoch': 1} {'type': 'loss', 'content': 0.26819655299186707, 'timestamp': '2025-09-10 02:34:35.412985', 'step': 1587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:35.470236', 'step': 1587, 'epoch': 1} {'type': 'loss', 'content': 0.16545329988002777, 'timestamp': '2025-09-10 02:34:35.476905', 'step': 1588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:35.532594', 'step': 1588, 'epoch': 1} {'type': 'loss', 'content': 0.14025716483592987, 'timestamp': '2025-09-10 02:34:35.534962', 'step': 1589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:35.592850', 'step': 1589, 'epoch': 1} {'type': 'loss', 'content': 0.20917631685733795, 'timestamp': '2025-09-10 02:34:35.594937', 'step': 1590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:35.653543', 'step': 1590, 'epoch': 1} {'type': 'loss', 'content': 0.1974421739578247, 'timestamp': '2025-09-10 02:34:35.655654', 'step': 1591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:35.713084', 'step': 1591, 'epoch': 1} {'type': 'loss', 'content': 0.23752902448177338, 'timestamp': '2025-09-10 02:34:35.720044', 'step': 1592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:35.776606', 'step': 1592, 'epoch': 1} {'type': 'loss', 'content': 0.15931133925914764, 'timestamp': '2025-09-10 02:34:35.778755', 'step': 1593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:35.835784', 'step': 1593, 'epoch': 1} {'type': 'loss', 'content': 0.31459230184555054, 'timestamp': '2025-09-10 02:34:35.838087', 'step': 1594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:35.894521', 'step': 1594, 'epoch': 1} {'type': 'loss', 'content': 0.14593516290187836, 'timestamp': '2025-09-10 02:34:35.896906', 'step': 1595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:35.954698', 'step': 1595, 'epoch': 1} {'type': 'loss', 'content': 0.23480357229709625, 'timestamp': '2025-09-10 02:34:35.961380', 'step': 1596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:36.017050', 'step': 1596, 'epoch': 1} {'type': 'loss', 'content': 0.19756458699703217, 'timestamp': '2025-09-10 02:34:36.019138', 'step': 1597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:36.075924', 'step': 1597, 'epoch': 1} {'type': 'loss', 'content': 0.15725310146808624, 'timestamp': '2025-09-10 02:34:36.078094', 'step': 1598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:36.133182', 'step': 1598, 'epoch': 1} {'type': 'loss', 'content': 0.199063241481781, 'timestamp': '2025-09-10 02:34:36.135485', 'step': 1599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:36.190260', 'step': 1599, 'epoch': 1} {'type': 'loss', 'content': 0.20042020082473755, 'timestamp': '2025-09-10 02:34:36.196818', 'step': 1600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:36.250766', 'step': 1600, 'epoch': 1} {'type': 'loss', 'content': 0.15635667741298676, 'timestamp': '2025-09-10 02:34:36.252738', 'step': 1601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:36.306548', 'step': 1601, 'epoch': 1} {'type': 'loss', 'content': 0.16584020853042603, 'timestamp': '2025-09-10 02:34:36.308694', 'step': 1602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:36.363325', 'step': 1602, 'epoch': 1} {'type': 'loss', 'content': 0.15057338774204254, 'timestamp': '2025-09-10 02:34:36.365543', 'step': 1603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:36.419779', 'step': 1603, 'epoch': 1} {'type': 'loss', 'content': 0.16101758182048798, 'timestamp': '2025-09-10 02:34:36.426657', 'step': 1604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:36.482364', 'step': 1604, 'epoch': 1} {'type': 'loss', 'content': 0.1364528387784958, 'timestamp': '2025-09-10 02:34:36.484413', 'step': 1605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:36.540258', 'step': 1605, 'epoch': 1} {'type': 'loss', 'content': 0.22301265597343445, 'timestamp': '2025-09-10 02:34:36.542447', 'step': 1606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:36.597370', 'step': 1606, 'epoch': 1} {'type': 'loss', 'content': 0.09092050045728683, 'timestamp': '2025-09-10 02:34:36.599625', 'step': 1607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:36.653715', 'step': 1607, 'epoch': 1} {'type': 'loss', 'content': 0.18263565003871918, 'timestamp': '2025-09-10 02:34:36.660068', 'step': 1608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:36.713973', 'step': 1608, 'epoch': 1} {'type': 'loss', 'content': 0.1878952533006668, 'timestamp': '2025-09-10 02:34:36.715958', 'step': 1609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:36.770152', 'step': 1609, 'epoch': 1} {'type': 'loss', 'content': 0.20481175184249878, 'timestamp': '2025-09-10 02:34:36.772186', 'step': 1610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:36.826950', 'step': 1610, 'epoch': 1} {'type': 'loss', 'content': 0.1922454535961151, 'timestamp': '2025-09-10 02:34:36.829018', 'step': 1611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:36.885497', 'step': 1611, 'epoch': 1} {'type': 'loss', 'content': 0.2200532853603363, 'timestamp': '2025-09-10 02:34:36.891965', 'step': 1612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:36.948249', 'step': 1612, 'epoch': 1} {'type': 'loss', 'content': 0.15329010784626007, 'timestamp': '2025-09-10 02:34:36.950599', 'step': 1613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:37.007706', 'step': 1613, 'epoch': 1} {'type': 'loss', 'content': 0.16330961883068085, 'timestamp': '2025-09-10 02:34:37.010322', 'step': 1614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:37.067532', 'step': 1614, 'epoch': 1} {'type': 'loss', 'content': 0.31593918800354004, 'timestamp': '2025-09-10 02:34:37.069743', 'step': 1615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:37.125781', 'step': 1615, 'epoch': 1} {'type': 'loss', 'content': 0.15250886976718903, 'timestamp': '2025-09-10 02:34:37.132705', 'step': 1616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:37.189492', 'step': 1616, 'epoch': 1} {'type': 'loss', 'content': 0.21656623482704163, 'timestamp': '2025-09-10 02:34:37.191755', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:37.249274', 'step': 1617, 'epoch': 1} {'type': 'loss', 'content': 0.18935894966125488, 'timestamp': '2025-09-10 02:34:37.251832', 'step': 1618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:37.310025', 'step': 1618, 'epoch': 1} {'type': 'loss', 'content': 0.18039381504058838, 'timestamp': '2025-09-10 02:34:37.312245', 'step': 1619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:37.369699', 'step': 1619, 'epoch': 1} {'type': 'loss', 'content': 0.15325625240802765, 'timestamp': '2025-09-10 02:34:37.376584', 'step': 1620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:37.432977', 'step': 1620, 'epoch': 1} {'type': 'loss', 'content': 0.2432800829410553, 'timestamp': '2025-09-10 02:34:37.435031', 'step': 1621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:37.491129', 'step': 1621, 'epoch': 1} {'type': 'loss', 'content': 0.19200782477855682, 'timestamp': '2025-09-10 02:34:37.493456', 'step': 1622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:37.550901', 'step': 1622, 'epoch': 1} {'type': 'loss', 'content': 0.1888517588376999, 'timestamp': '2025-09-10 02:34:37.553072', 'step': 1623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:37.610125', 'step': 1623, 'epoch': 1} {'type': 'loss', 'content': 0.3588774800300598, 'timestamp': '2025-09-10 02:34:37.616882', 'step': 1624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:37.673257', 'step': 1624, 'epoch': 1} {'type': 'loss', 'content': 0.16921114921569824, 'timestamp': '2025-09-10 02:34:37.675333', 'step': 1625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:37.730721', 'step': 1625, 'epoch': 1} {'type': 'loss', 'content': 0.098018117249012, 'timestamp': '2025-09-10 02:34:37.732763', 'step': 1626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:37.787279', 'step': 1626, 'epoch': 1} {'type': 'loss', 'content': 0.13790178298950195, 'timestamp': '2025-09-10 02:34:37.789405', 'step': 1627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:37.843121', 'step': 1627, 'epoch': 1} {'type': 'loss', 'content': 0.127920463681221, 'timestamp': '2025-09-10 02:34:37.849510', 'step': 1628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:37.902263', 'step': 1628, 'epoch': 1} {'type': 'loss', 'content': 0.16496562957763672, 'timestamp': '2025-09-10 02:34:37.904481', 'step': 1629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:37.959581', 'step': 1629, 'epoch': 1} {'type': 'loss', 'content': 0.16741275787353516, 'timestamp': '2025-09-10 02:34:37.961743', 'step': 1630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:38.014724', 'step': 1630, 'epoch': 1} {'type': 'loss', 'content': 0.1297004222869873, 'timestamp': '2025-09-10 02:34:38.017030', 'step': 1631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:38.070544', 'step': 1631, 'epoch': 1} {'type': 'loss', 'content': 0.18600691854953766, 'timestamp': '2025-09-10 02:34:38.076965', 'step': 1632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:38.131971', 'step': 1632, 'epoch': 1} {'type': 'loss', 'content': 0.17499218881130219, 'timestamp': '2025-09-10 02:34:38.134436', 'step': 1633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:38.193144', 'step': 1633, 'epoch': 1} {'type': 'loss', 'content': 0.16125789284706116, 'timestamp': '2025-09-10 02:34:38.195566', 'step': 1634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:38.256313', 'step': 1634, 'epoch': 1} {'type': 'loss', 'content': 0.17966048419475555, 'timestamp': '2025-09-10 02:34:38.258585', 'step': 1635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:38.315404', 'step': 1635, 'epoch': 1} {'type': 'loss', 'content': 0.17313432693481445, 'timestamp': '2025-09-10 02:34:38.326281', 'step': 1636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:38.385663', 'step': 1636, 'epoch': 1} {'type': 'loss', 'content': 0.24588951468467712, 'timestamp': '2025-09-10 02:34:38.387836', 'step': 1637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:38.444116', 'step': 1637, 'epoch': 1} {'type': 'loss', 'content': 0.3009982407093048, 'timestamp': '2025-09-10 02:34:38.446420', 'step': 1638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:38.500768', 'step': 1638, 'epoch': 1} {'type': 'loss', 'content': 0.25027668476104736, 'timestamp': '2025-09-10 02:34:38.503023', 'step': 1639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:38.558728', 'step': 1639, 'epoch': 1} {'type': 'loss', 'content': 0.22018270194530487, 'timestamp': '2025-09-10 02:34:38.565355', 'step': 1640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:38.620592', 'step': 1640, 'epoch': 1} {'type': 'loss', 'content': 0.19613836705684662, 'timestamp': '2025-09-10 02:34:38.622940', 'step': 1641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:38.679593', 'step': 1641, 'epoch': 1} {'type': 'loss', 'content': 0.1649337112903595, 'timestamp': '2025-09-10 02:34:38.681745', 'step': 1642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:38.737738', 'step': 1642, 'epoch': 1} {'type': 'loss', 'content': 0.1702379584312439, 'timestamp': '2025-09-10 02:34:38.740079', 'step': 1643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:38.802018', 'step': 1643, 'epoch': 1} {'type': 'loss', 'content': 0.26083460450172424, 'timestamp': '2025-09-10 02:34:38.808873', 'step': 1644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:38.865787', 'step': 1644, 'epoch': 1} {'type': 'loss', 'content': 0.1063104197382927, 'timestamp': '2025-09-10 02:34:38.868113', 'step': 1645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:38.923641', 'step': 1645, 'epoch': 1} {'type': 'loss', 'content': 0.11904440075159073, 'timestamp': '2025-09-10 02:34:38.926190', 'step': 1646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:38.982061', 'step': 1646, 'epoch': 1} {'type': 'loss', 'content': 0.14068713784217834, 'timestamp': '2025-09-10 02:34:38.984292', 'step': 1647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:39.039524', 'step': 1647, 'epoch': 1} {'type': 'loss', 'content': 0.1680920571088791, 'timestamp': '2025-09-10 02:34:39.045874', 'step': 1648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:39.099893', 'step': 1648, 'epoch': 1} {'type': 'loss', 'content': 0.1657353788614273, 'timestamp': '2025-09-10 02:34:39.102366', 'step': 1649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:39.158216', 'step': 1649, 'epoch': 1} {'type': 'loss', 'content': 0.19477228820323944, 'timestamp': '2025-09-10 02:34:39.160510', 'step': 1650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:39.215872', 'step': 1650, 'epoch': 1} {'type': 'loss', 'content': 0.18438845872879028, 'timestamp': '2025-09-10 02:34:39.218072', 'step': 1651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:39.280408', 'step': 1651, 'epoch': 1} {'type': 'loss', 'content': 0.18550041317939758, 'timestamp': '2025-09-10 02:34:39.286799', 'step': 1652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:39.340410', 'step': 1652, 'epoch': 1} {'type': 'loss', 'content': 0.1820819228887558, 'timestamp': '2025-09-10 02:34:39.342580', 'step': 1653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:39.395920', 'step': 1653, 'epoch': 1} {'type': 'loss', 'content': 0.13420651853084564, 'timestamp': '2025-09-10 02:34:39.398111', 'step': 1654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:39.452112', 'step': 1654, 'epoch': 1} {'type': 'loss', 'content': 0.229567289352417, 'timestamp': '2025-09-10 02:34:39.454420', 'step': 1655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:39.508605', 'step': 1655, 'epoch': 1} {'type': 'loss', 'content': 0.3119477927684784, 'timestamp': '2025-09-10 02:34:39.515047', 'step': 1656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:39.569017', 'step': 1656, 'epoch': 1} {'type': 'loss', 'content': 0.1731695681810379, 'timestamp': '2025-09-10 02:34:39.571309', 'step': 1657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:39.625448', 'step': 1657, 'epoch': 1} {'type': 'loss', 'content': 0.1258441060781479, 'timestamp': '2025-09-10 02:34:39.627513', 'step': 1658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:39.682842', 'step': 1658, 'epoch': 1} {'type': 'loss', 'content': 0.15656211972236633, 'timestamp': '2025-09-10 02:34:39.685093', 'step': 1659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:39.743017', 'step': 1659, 'epoch': 1} {'type': 'loss', 'content': 0.15263597667217255, 'timestamp': '2025-09-10 02:34:39.749852', 'step': 1660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:39.806731', 'step': 1660, 'epoch': 1} {'type': 'loss', 'content': 0.1607038378715515, 'timestamp': '2025-09-10 02:34:39.809160', 'step': 1661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:39.867835', 'step': 1661, 'epoch': 1} {'type': 'loss', 'content': 0.19568683207035065, 'timestamp': '2025-09-10 02:34:39.870336', 'step': 1662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:39.928082', 'step': 1662, 'epoch': 1} {'type': 'loss', 'content': 0.13754211366176605, 'timestamp': '2025-09-10 02:34:39.930398', 'step': 1663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:39.988560', 'step': 1663, 'epoch': 1} {'type': 'loss', 'content': 0.1287572681903839, 'timestamp': '2025-09-10 02:34:39.995831', 'step': 1664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:40.053748', 'step': 1664, 'epoch': 1} {'type': 'loss', 'content': 0.14817595481872559, 'timestamp': '2025-09-10 02:34:40.056169', 'step': 1665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:40.115327', 'step': 1665, 'epoch': 1} {'type': 'loss', 'content': 0.15209431946277618, 'timestamp': '2025-09-10 02:34:40.117736', 'step': 1666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:40.176171', 'step': 1666, 'epoch': 1} {'type': 'loss', 'content': 0.19699306786060333, 'timestamp': '2025-09-10 02:34:40.178763', 'step': 1667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:40.238780', 'step': 1667, 'epoch': 1} {'type': 'loss', 'content': 0.2132243514060974, 'timestamp': '2025-09-10 02:34:40.246114', 'step': 1668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:40.303091', 'step': 1668, 'epoch': 1} {'type': 'loss', 'content': 0.12055326998233795, 'timestamp': '2025-09-10 02:34:40.305341', 'step': 1669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:40.361539', 'step': 1669, 'epoch': 1} {'type': 'loss', 'content': 0.22553853690624237, 'timestamp': '2025-09-10 02:34:40.363786', 'step': 1670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:40.421222', 'step': 1670, 'epoch': 1} {'type': 'loss', 'content': 0.1380789577960968, 'timestamp': '2025-09-10 02:34:40.423306', 'step': 1671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:40.478622', 'step': 1671, 'epoch': 1} {'type': 'loss', 'content': 0.2091488391160965, 'timestamp': '2025-09-10 02:34:40.485281', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:40.539454', 'step': 1672, 'epoch': 1} {'type': 'loss', 'content': 0.18319900333881378, 'timestamp': '2025-09-10 02:34:40.541485', 'step': 1673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:40.595557', 'step': 1673, 'epoch': 1} {'type': 'loss', 'content': 0.15281367301940918, 'timestamp': '2025-09-10 02:34:40.597693', 'step': 1674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:40.656764', 'step': 1674, 'epoch': 1} {'type': 'loss', 'content': 0.21410685777664185, 'timestamp': '2025-09-10 02:34:40.659510', 'step': 1675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:40.720735', 'step': 1675, 'epoch': 1} {'type': 'loss', 'content': 0.18369513750076294, 'timestamp': '2025-09-10 02:34:40.728571', 'step': 1676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:40.790625', 'step': 1676, 'epoch': 1} {'type': 'loss', 'content': 0.12048395723104477, 'timestamp': '2025-09-10 02:34:40.793222', 'step': 1677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:40.853568', 'step': 1677, 'epoch': 1} {'type': 'loss', 'content': 0.15275806188583374, 'timestamp': '2025-09-10 02:34:40.856155', 'step': 1678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:40.917468', 'step': 1678, 'epoch': 1} {'type': 'loss', 'content': 0.15260209143161774, 'timestamp': '2025-09-10 02:34:40.919989', 'step': 1679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:40.977230', 'step': 1679, 'epoch': 1} {'type': 'loss', 'content': 0.1864410638809204, 'timestamp': '2025-09-10 02:34:40.983974', 'step': 1680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:41.039569', 'step': 1680, 'epoch': 1} {'type': 'loss', 'content': 0.13073788583278656, 'timestamp': '2025-09-10 02:34:41.041875', 'step': 1681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:41.096979', 'step': 1681, 'epoch': 1} {'type': 'loss', 'content': 0.1390557885169983, 'timestamp': '2025-09-10 02:34:41.100071', 'step': 1682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:41.155539', 'step': 1682, 'epoch': 1} {'type': 'loss', 'content': 0.1463257223367691, 'timestamp': '2025-09-10 02:34:41.157845', 'step': 1683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:41.212218', 'step': 1683, 'epoch': 1} {'type': 'loss', 'content': 0.1841537058353424, 'timestamp': '2025-09-10 02:34:41.218416', 'step': 1684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:41.272856', 'step': 1684, 'epoch': 1} {'type': 'loss', 'content': 0.3050093650817871, 'timestamp': '2025-09-10 02:34:41.275316', 'step': 1685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:41.332121', 'step': 1685, 'epoch': 1} {'type': 'loss', 'content': 0.21999837458133698, 'timestamp': '2025-09-10 02:34:41.334458', 'step': 1686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:41.390360', 'step': 1686, 'epoch': 1} {'type': 'loss', 'content': 0.25631168484687805, 'timestamp': '2025-09-10 02:34:41.392718', 'step': 1687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:41.449783', 'step': 1687, 'epoch': 1} {'type': 'loss', 'content': 0.11526577174663544, 'timestamp': '2025-09-10 02:34:41.456592', 'step': 1688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:41.511160', 'step': 1688, 'epoch': 1} {'type': 'loss', 'content': 0.1628880351781845, 'timestamp': '2025-09-10 02:34:41.513700', 'step': 1689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:41.569052', 'step': 1689, 'epoch': 1} {'type': 'loss', 'content': 0.2777441143989563, 'timestamp': '2025-09-10 02:34:41.571433', 'step': 1690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:41.625218', 'step': 1690, 'epoch': 1} {'type': 'loss', 'content': 0.29418453574180603, 'timestamp': '2025-09-10 02:34:41.627629', 'step': 1691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:41.682323', 'step': 1691, 'epoch': 1} {'type': 'loss', 'content': 0.14617611467838287, 'timestamp': '2025-09-10 02:34:41.688590', 'step': 1692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:41.742006', 'step': 1692, 'epoch': 1} {'type': 'loss', 'content': 0.2614787518978119, 'timestamp': '2025-09-10 02:34:41.744191', 'step': 1693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:41.798423', 'step': 1693, 'epoch': 1} {'type': 'loss', 'content': 0.15088506042957306, 'timestamp': '2025-09-10 02:34:41.800712', 'step': 1694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:41.853846', 'step': 1694, 'epoch': 1} {'type': 'loss', 'content': 0.13217224180698395, 'timestamp': '2025-09-10 02:34:41.855960', 'step': 1695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:41.908993', 'step': 1695, 'epoch': 1} {'type': 'loss', 'content': 0.14051900804042816, 'timestamp': '2025-09-10 02:34:41.915159', 'step': 1696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:41.968973', 'step': 1696, 'epoch': 1} {'type': 'loss', 'content': 0.2258928120136261, 'timestamp': '2025-09-10 02:34:41.971153', 'step': 1697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:42.024109', 'step': 1697, 'epoch': 1} {'type': 'loss', 'content': 0.1875530630350113, 'timestamp': '2025-09-10 02:34:42.026340', 'step': 1698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:42.079729', 'step': 1698, 'epoch': 1} {'type': 'loss', 'content': 0.1656734198331833, 'timestamp': '2025-09-10 02:34:42.081951', 'step': 1699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:42.134683', 'step': 1699, 'epoch': 1} {'type': 'loss', 'content': 0.20075081288814545, 'timestamp': '2025-09-10 02:34:42.140726', 'step': 1700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:42.192953', 'step': 1700, 'epoch': 1} {'type': 'loss', 'content': 0.10827746242284775, 'timestamp': '2025-09-10 02:34:42.195164', 'step': 1701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:42.247814', 'step': 1701, 'epoch': 1} {'type': 'loss', 'content': 0.15269215404987335, 'timestamp': '2025-09-10 02:34:42.250230', 'step': 1702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:42.304265', 'step': 1702, 'epoch': 1} {'type': 'loss', 'content': 0.14453735947608948, 'timestamp': '2025-09-10 02:34:42.306525', 'step': 1703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:42.360937', 'step': 1703, 'epoch': 1} {'type': 'loss', 'content': 0.2141013890504837, 'timestamp': '2025-09-10 02:34:42.367322', 'step': 1704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:42.420852', 'step': 1704, 'epoch': 1} {'type': 'loss', 'content': 0.24212603271007538, 'timestamp': '2025-09-10 02:34:42.423375', 'step': 1705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:42.476756', 'step': 1705, 'epoch': 1} {'type': 'loss', 'content': 0.15056131780147552, 'timestamp': '2025-09-10 02:34:42.479472', 'step': 1706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:42.532813', 'step': 1706, 'epoch': 1} {'type': 'loss', 'content': 0.23810304701328278, 'timestamp': '2025-09-10 02:34:42.534948', 'step': 1707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:42.588574', 'step': 1707, 'epoch': 1} {'type': 'loss', 'content': 0.1506997048854828, 'timestamp': '2025-09-10 02:34:42.594871', 'step': 1708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:42.647467', 'step': 1708, 'epoch': 1} {'type': 'loss', 'content': 0.14345724880695343, 'timestamp': '2025-09-10 02:34:42.649670', 'step': 1709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:42.702482', 'step': 1709, 'epoch': 1} {'type': 'loss', 'content': 0.22288021445274353, 'timestamp': '2025-09-10 02:34:42.704830', 'step': 1710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:42.759302', 'step': 1710, 'epoch': 1} {'type': 'loss', 'content': 0.12027601897716522, 'timestamp': '2025-09-10 02:34:42.761342', 'step': 1711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:42.814099', 'step': 1711, 'epoch': 1} {'type': 'loss', 'content': 0.1895124912261963, 'timestamp': '2025-09-10 02:34:42.820206', 'step': 1712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:42.872848', 'step': 1712, 'epoch': 1} {'type': 'loss', 'content': 0.17300929129123688, 'timestamp': '2025-09-10 02:34:42.875091', 'step': 1713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:42.929311', 'step': 1713, 'epoch': 1} {'type': 'loss', 'content': 0.2719428837299347, 'timestamp': '2025-09-10 02:34:42.931626', 'step': 1714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:42.984807', 'step': 1714, 'epoch': 1} {'type': 'loss', 'content': 0.2785530090332031, 'timestamp': '2025-09-10 02:34:42.987030', 'step': 1715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:43.040124', 'step': 1715, 'epoch': 1} {'type': 'loss', 'content': 0.16342155635356903, 'timestamp': '2025-09-10 02:34:43.046462', 'step': 1716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:43.100137', 'step': 1716, 'epoch': 1} {'type': 'loss', 'content': 0.13881677389144897, 'timestamp': '2025-09-10 02:34:43.102280', 'step': 1717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:43.156727', 'step': 1717, 'epoch': 1} {'type': 'loss', 'content': 0.1571035534143448, 'timestamp': '2025-09-10 02:34:43.159024', 'step': 1718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:43.212428', 'step': 1718, 'epoch': 1} {'type': 'loss', 'content': 0.3456909656524658, 'timestamp': '2025-09-10 02:34:43.214692', 'step': 1719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:43.269135', 'step': 1719, 'epoch': 1} {'type': 'loss', 'content': 0.13385728001594543, 'timestamp': '2025-09-10 02:34:43.275408', 'step': 1720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:43.328495', 'step': 1720, 'epoch': 1} {'type': 'loss', 'content': 0.18143263459205627, 'timestamp': '2025-09-10 02:34:43.330493', 'step': 1721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:43.383584', 'step': 1721, 'epoch': 1} {'type': 'loss', 'content': 0.18454797565937042, 'timestamp': '2025-09-10 02:34:43.385756', 'step': 1722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:43.439317', 'step': 1722, 'epoch': 1} {'type': 'loss', 'content': 0.19538824260234833, 'timestamp': '2025-09-10 02:34:43.441376', 'step': 1723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:43.494711', 'step': 1723, 'epoch': 1} {'type': 'loss', 'content': 0.2079411894083023, 'timestamp': '2025-09-10 02:34:43.500917', 'step': 1724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:43.554303', 'step': 1724, 'epoch': 1} {'type': 'loss', 'content': 0.10825066268444061, 'timestamp': '2025-09-10 02:34:43.556562', 'step': 1725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:43.610472', 'step': 1725, 'epoch': 1} {'type': 'loss', 'content': 0.17726895213127136, 'timestamp': '2025-09-10 02:34:43.612716', 'step': 1726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:43.665875', 'step': 1726, 'epoch': 1} {'type': 'loss', 'content': 0.18882820010185242, 'timestamp': '2025-09-10 02:34:43.668093', 'step': 1727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:43.721072', 'step': 1727, 'epoch': 1} {'type': 'loss', 'content': 0.1036553829908371, 'timestamp': '2025-09-10 02:34:43.727168', 'step': 1728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:43.779078', 'step': 1728, 'epoch': 1} {'type': 'loss', 'content': 0.25172314047813416, 'timestamp': '2025-09-10 02:34:43.781136', 'step': 1729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:43.833768', 'step': 1729, 'epoch': 1} {'type': 'loss', 'content': 0.1940319687128067, 'timestamp': '2025-09-10 02:34:43.836194', 'step': 1730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:43.889232', 'step': 1730, 'epoch': 1} {'type': 'loss', 'content': 0.10007455199956894, 'timestamp': '2025-09-10 02:34:43.891471', 'step': 1731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:43.945010', 'step': 1731, 'epoch': 1} {'type': 'loss', 'content': 0.1302260458469391, 'timestamp': '2025-09-10 02:34:43.951596', 'step': 1732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:44.008228', 'step': 1732, 'epoch': 1} {'type': 'loss', 'content': 0.2767462432384491, 'timestamp': '2025-09-10 02:34:44.010364', 'step': 1733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:44.063404', 'step': 1733, 'epoch': 1} {'type': 'loss', 'content': 0.20204874873161316, 'timestamp': '2025-09-10 02:34:44.065580', 'step': 1734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:44.119898', 'step': 1734, 'epoch': 1} {'type': 'loss', 'content': 0.193618506193161, 'timestamp': '2025-09-10 02:34:44.121859', 'step': 1735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:44.174556', 'step': 1735, 'epoch': 1} {'type': 'loss', 'content': 0.1949945092201233, 'timestamp': '2025-09-10 02:34:44.180330', 'step': 1736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:44.232916', 'step': 1736, 'epoch': 1} {'type': 'loss', 'content': 0.14183494448661804, 'timestamp': '2025-09-10 02:34:44.234916', 'step': 1737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:44.287122', 'step': 1737, 'epoch': 1} {'type': 'loss', 'content': 0.22923734784126282, 'timestamp': '2025-09-10 02:34:44.289101', 'step': 1738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:44.341647', 'step': 1738, 'epoch': 1} {'type': 'loss', 'content': 0.19678634405136108, 'timestamp': '2025-09-10 02:34:44.343641', 'step': 1739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:44.396797', 'step': 1739, 'epoch': 1} {'type': 'loss', 'content': 0.2324250042438507, 'timestamp': '2025-09-10 02:34:44.402473', 'step': 1740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:44.455206', 'step': 1740, 'epoch': 1} {'type': 'loss', 'content': 0.1726357787847519, 'timestamp': '2025-09-10 02:34:44.457077', 'step': 1741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:44.510168', 'step': 1741, 'epoch': 1} {'type': 'loss', 'content': 0.17215003073215485, 'timestamp': '2025-09-10 02:34:44.512193', 'step': 1742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:44.566860', 'step': 1742, 'epoch': 1} {'type': 'loss', 'content': 0.19400228559970856, 'timestamp': '2025-09-10 02:34:44.568912', 'step': 1743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:44.622625', 'step': 1743, 'epoch': 1} {'type': 'loss', 'content': 0.16942337155342102, 'timestamp': '2025-09-10 02:34:44.628458', 'step': 1744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:44.680800', 'step': 1744, 'epoch': 1} {'type': 'loss', 'content': 0.1748734712600708, 'timestamp': '2025-09-10 02:34:44.683036', 'step': 1745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:44.736200', 'step': 1745, 'epoch': 1} {'type': 'loss', 'content': 0.1529131978750229, 'timestamp': '2025-09-10 02:34:44.738430', 'step': 1746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:44.791053', 'step': 1746, 'epoch': 1} {'type': 'loss', 'content': 0.13929897546768188, 'timestamp': '2025-09-10 02:34:44.793415', 'step': 1747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:44.845721', 'step': 1747, 'epoch': 1} {'type': 'loss', 'content': 0.19889651238918304, 'timestamp': '2025-09-10 02:34:44.851867', 'step': 1748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:44.904412', 'step': 1748, 'epoch': 1} {'type': 'loss', 'content': 0.18358679115772247, 'timestamp': '2025-09-10 02:34:44.906379', 'step': 1749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:44.958862', 'step': 1749, 'epoch': 1} {'type': 'loss', 'content': 0.1505943089723587, 'timestamp': '2025-09-10 02:34:44.960991', 'step': 1750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:45.014052', 'step': 1750, 'epoch': 1} {'type': 'loss', 'content': 0.14491094648838043, 'timestamp': '2025-09-10 02:34:45.016402', 'step': 1751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:45.070142', 'step': 1751, 'epoch': 1} {'type': 'loss', 'content': 0.26632389426231384, 'timestamp': '2025-09-10 02:34:45.076816', 'step': 1752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:45.130674', 'step': 1752, 'epoch': 1} {'type': 'loss', 'content': 0.1810043454170227, 'timestamp': '2025-09-10 02:34:45.132603', 'step': 1753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:45.185889', 'step': 1753, 'epoch': 1} {'type': 'loss', 'content': 0.1949019432067871, 'timestamp': '2025-09-10 02:34:45.187912', 'step': 1754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:45.242880', 'step': 1754, 'epoch': 1} {'type': 'loss', 'content': 0.10665576905012131, 'timestamp': '2025-09-10 02:34:45.245019', 'step': 1755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:45.298929', 'step': 1755, 'epoch': 1} {'type': 'loss', 'content': 0.10834620147943497, 'timestamp': '2025-09-10 02:34:45.305028', 'step': 1756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:45.357756', 'step': 1756, 'epoch': 1} {'type': 'loss', 'content': 0.13416068255901337, 'timestamp': '2025-09-10 02:34:45.360000', 'step': 1757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:45.413352', 'step': 1757, 'epoch': 1} {'type': 'loss', 'content': 0.23407886922359467, 'timestamp': '2025-09-10 02:34:45.415705', 'step': 1758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:45.469010', 'step': 1758, 'epoch': 1} {'type': 'loss', 'content': 0.14162075519561768, 'timestamp': '2025-09-10 02:34:45.471209', 'step': 1759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:45.524948', 'step': 1759, 'epoch': 1} {'type': 'loss', 'content': 0.11879343539476395, 'timestamp': '2025-09-10 02:34:45.530898', 'step': 1760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:45.584600', 'step': 1760, 'epoch': 1} {'type': 'loss', 'content': 0.13066460192203522, 'timestamp': '2025-09-10 02:34:45.586825', 'step': 1761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:45.641482', 'step': 1761, 'epoch': 1} {'type': 'loss', 'content': 0.1755286306142807, 'timestamp': '2025-09-10 02:34:45.643854', 'step': 1762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:45.697866', 'step': 1762, 'epoch': 1} {'type': 'loss', 'content': 0.17744801938533783, 'timestamp': '2025-09-10 02:34:45.700200', 'step': 1763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:45.752778', 'step': 1763, 'epoch': 1} {'type': 'loss', 'content': 0.22598977386951447, 'timestamp': '2025-09-10 02:34:45.758829', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:45.811128', 'step': 1764, 'epoch': 1} {'type': 'loss', 'content': 0.13679611682891846, 'timestamp': '2025-09-10 02:34:45.813269', 'step': 1765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:45.866288', 'step': 1765, 'epoch': 1} {'type': 'loss', 'content': 0.13383427262306213, 'timestamp': '2025-09-10 02:34:45.868481', 'step': 1766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:45.921581', 'step': 1766, 'epoch': 1} {'type': 'loss', 'content': 0.1274641901254654, 'timestamp': '2025-09-10 02:34:45.923680', 'step': 1767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:45.976576', 'step': 1767, 'epoch': 1} {'type': 'loss', 'content': 0.16876095533370972, 'timestamp': '2025-09-10 02:34:45.982513', 'step': 1768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:46.035092', 'step': 1768, 'epoch': 1} {'type': 'loss', 'content': 0.12984468042850494, 'timestamp': '2025-09-10 02:34:46.037374', 'step': 1769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:46.092791', 'step': 1769, 'epoch': 1} {'type': 'loss', 'content': 0.20296040177345276, 'timestamp': '2025-09-10 02:34:46.094982', 'step': 1770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:46.148585', 'step': 1770, 'epoch': 1} {'type': 'loss', 'content': 0.14888311922550201, 'timestamp': '2025-09-10 02:34:46.150908', 'step': 1771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:46.204158', 'step': 1771, 'epoch': 1} {'type': 'loss', 'content': 0.219247505068779, 'timestamp': '2025-09-10 02:34:46.210294', 'step': 1772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:46.263413', 'step': 1772, 'epoch': 1} {'type': 'loss', 'content': 0.2338819056749344, 'timestamp': '2025-09-10 02:34:46.265583', 'step': 1773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:46.318529', 'step': 1773, 'epoch': 1} {'type': 'loss', 'content': 0.25072863698005676, 'timestamp': '2025-09-10 02:34:46.320745', 'step': 1774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:46.374064', 'step': 1774, 'epoch': 1} {'type': 'loss', 'content': 0.21500073373317719, 'timestamp': '2025-09-10 02:34:46.376405', 'step': 1775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:46.429975', 'step': 1775, 'epoch': 1} {'type': 'loss', 'content': 0.16418185830116272, 'timestamp': '2025-09-10 02:34:46.435856', 'step': 1776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:46.488158', 'step': 1776, 'epoch': 1} {'type': 'loss', 'content': 0.23433533310890198, 'timestamp': '2025-09-10 02:34:46.491300', 'step': 1777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:46.543517', 'step': 1777, 'epoch': 1} {'type': 'loss', 'content': 0.198159858584404, 'timestamp': '2025-09-10 02:34:46.545822', 'step': 1778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:46.598961', 'step': 1778, 'epoch': 1} {'type': 'loss', 'content': 0.21230915188789368, 'timestamp': '2025-09-10 02:34:46.601185', 'step': 1779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:46.654332', 'step': 1779, 'epoch': 1} {'type': 'loss', 'content': 0.19794930517673492, 'timestamp': '2025-09-10 02:34:46.660508', 'step': 1780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:46.714139', 'step': 1780, 'epoch': 1} {'type': 'loss', 'content': 0.29309573769569397, 'timestamp': '2025-09-10 02:34:46.716313', 'step': 1781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:46.768919', 'step': 1781, 'epoch': 1} {'type': 'loss', 'content': 0.16841894388198853, 'timestamp': '2025-09-10 02:34:46.771185', 'step': 1782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:46.824652', 'step': 1782, 'epoch': 1} {'type': 'loss', 'content': 0.16762909293174744, 'timestamp': '2025-09-10 02:34:46.827007', 'step': 1783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:46.880095', 'step': 1783, 'epoch': 1} {'type': 'loss', 'content': 0.22364027798175812, 'timestamp': '2025-09-10 02:34:46.886272', 'step': 1784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:46.938971', 'step': 1784, 'epoch': 1} {'type': 'loss', 'content': 0.16515633463859558, 'timestamp': '2025-09-10 02:34:46.941024', 'step': 1785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:46.993558', 'step': 1785, 'epoch': 1} {'type': 'loss', 'content': 0.08741644769906998, 'timestamp': '2025-09-10 02:34:46.995718', 'step': 1786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:47.048601', 'step': 1786, 'epoch': 1} {'type': 'loss', 'content': 0.16650839149951935, 'timestamp': '2025-09-10 02:34:47.050896', 'step': 1787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:47.105297', 'step': 1787, 'epoch': 1} {'type': 'loss', 'content': 0.1342858225107193, 'timestamp': '2025-09-10 02:34:47.111379', 'step': 1788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:47.164647', 'step': 1788, 'epoch': 1} {'type': 'loss', 'content': 0.20640596747398376, 'timestamp': '2025-09-10 02:34:47.166938', 'step': 1789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:47.221392', 'step': 1789, 'epoch': 1} {'type': 'loss', 'content': 0.21029090881347656, 'timestamp': '2025-09-10 02:34:47.223623', 'step': 1790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:47.277068', 'step': 1790, 'epoch': 1} {'type': 'loss', 'content': 0.15648376941680908, 'timestamp': '2025-09-10 02:34:47.279393', 'step': 1791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:47.333971', 'step': 1791, 'epoch': 1} {'type': 'loss', 'content': 0.2114896923303604, 'timestamp': '2025-09-10 02:34:47.340269', 'step': 1792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:47.392661', 'step': 1792, 'epoch': 1} {'type': 'loss', 'content': 0.16071060299873352, 'timestamp': '2025-09-10 02:34:47.394818', 'step': 1793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:47.447838', 'step': 1793, 'epoch': 1} {'type': 'loss', 'content': 0.2344500720500946, 'timestamp': '2025-09-10 02:34:47.449987', 'step': 1794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:47.502842', 'step': 1794, 'epoch': 1} {'type': 'loss', 'content': 0.13741731643676758, 'timestamp': '2025-09-10 02:34:47.505056', 'step': 1795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:47.557549', 'step': 1795, 'epoch': 1} {'type': 'loss', 'content': 0.1952408403158188, 'timestamp': '2025-09-10 02:34:47.563474', 'step': 1796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:47.615954', 'step': 1796, 'epoch': 1} {'type': 'loss', 'content': 0.2795052230358124, 'timestamp': '2025-09-10 02:34:47.618197', 'step': 1797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:47.672636', 'step': 1797, 'epoch': 1} {'type': 'loss', 'content': 0.2877664268016815, 'timestamp': '2025-09-10 02:34:47.674818', 'step': 1798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:47.730166', 'step': 1798, 'epoch': 1} {'type': 'loss', 'content': 0.17463108897209167, 'timestamp': '2025-09-10 02:34:47.732542', 'step': 1799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:47.787266', 'step': 1799, 'epoch': 1} {'type': 'loss', 'content': 0.1473289430141449, 'timestamp': '2025-09-10 02:34:47.793600', 'step': 1800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:47.847291', 'step': 1800, 'epoch': 1} {'type': 'loss', 'content': 0.17783191800117493, 'timestamp': '2025-09-10 02:34:47.849459', 'step': 1801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:47.904063', 'step': 1801, 'epoch': 1} {'type': 'loss', 'content': 0.10489054024219513, 'timestamp': '2025-09-10 02:34:47.906359', 'step': 1802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:47.960619', 'step': 1802, 'epoch': 1} {'type': 'loss', 'content': 0.14052829146385193, 'timestamp': '2025-09-10 02:34:47.962880', 'step': 1803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:48.016192', 'step': 1803, 'epoch': 1} {'type': 'loss', 'content': 0.1546352207660675, 'timestamp': '2025-09-10 02:34:48.022403', 'step': 1804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:48.075787', 'step': 1804, 'epoch': 1} {'type': 'loss', 'content': 0.18328720331192017, 'timestamp': '2025-09-10 02:34:48.078122', 'step': 1805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:48.131857', 'step': 1805, 'epoch': 1} {'type': 'loss', 'content': 0.20278556644916534, 'timestamp': '2025-09-10 02:34:48.134301', 'step': 1806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:48.188305', 'step': 1806, 'epoch': 1} {'type': 'loss', 'content': 0.1635398417711258, 'timestamp': '2025-09-10 02:34:48.190484', 'step': 1807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:48.243642', 'step': 1807, 'epoch': 1} {'type': 'loss', 'content': 0.24309884011745453, 'timestamp': '2025-09-10 02:34:48.249854', 'step': 1808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:48.302827', 'step': 1808, 'epoch': 1} {'type': 'loss', 'content': 0.23603172600269318, 'timestamp': '2025-09-10 02:34:48.305284', 'step': 1809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:48.358344', 'step': 1809, 'epoch': 1} {'type': 'loss', 'content': 0.18805614113807678, 'timestamp': '2025-09-10 02:34:48.360615', 'step': 1810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:48.413417', 'step': 1810, 'epoch': 1} {'type': 'loss', 'content': 0.14977866411209106, 'timestamp': '2025-09-10 02:34:48.415704', 'step': 1811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:48.469489', 'step': 1811, 'epoch': 1} {'type': 'loss', 'content': 0.20722520351409912, 'timestamp': '2025-09-10 02:34:48.475444', 'step': 1812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:48.527153', 'step': 1812, 'epoch': 1} {'type': 'loss', 'content': 0.16431652009487152, 'timestamp': '2025-09-10 02:34:48.529605', 'step': 1813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:48.582773', 'step': 1813, 'epoch': 1} {'type': 'loss', 'content': 0.12714028358459473, 'timestamp': '2025-09-10 02:34:48.584828', 'step': 1814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:48.637677', 'step': 1814, 'epoch': 1} {'type': 'loss', 'content': 0.15213049948215485, 'timestamp': '2025-09-10 02:34:48.639763', 'step': 1815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:48.692112', 'step': 1815, 'epoch': 1} {'type': 'loss', 'content': 0.22702647745609283, 'timestamp': '2025-09-10 02:34:48.698233', 'step': 1816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:48.751335', 'step': 1816, 'epoch': 1} {'type': 'loss', 'content': 0.15837299823760986, 'timestamp': '2025-09-10 02:34:48.753465', 'step': 1817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:48.807576', 'step': 1817, 'epoch': 1} {'type': 'loss', 'content': 0.0943395346403122, 'timestamp': '2025-09-10 02:34:48.809769', 'step': 1818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:48.865527', 'step': 1818, 'epoch': 1} {'type': 'loss', 'content': 0.16359665989875793, 'timestamp': '2025-09-10 02:34:48.867540', 'step': 1819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:48.920997', 'step': 1819, 'epoch': 1} {'type': 'loss', 'content': 0.21870329976081848, 'timestamp': '2025-09-10 02:34:48.927242', 'step': 1820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:48.980404', 'step': 1820, 'epoch': 1} {'type': 'loss', 'content': 0.15145449340343475, 'timestamp': '2025-09-10 02:34:48.982688', 'step': 1821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:49.034999', 'step': 1821, 'epoch': 1} {'type': 'loss', 'content': 0.16635999083518982, 'timestamp': '2025-09-10 02:34:49.037033', 'step': 1822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:49.090955', 'step': 1822, 'epoch': 1} {'type': 'loss', 'content': 0.12798961997032166, 'timestamp': '2025-09-10 02:34:49.093129', 'step': 1823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:49.145117', 'step': 1823, 'epoch': 1} {'type': 'loss', 'content': 0.13934771716594696, 'timestamp': '2025-09-10 02:34:49.150856', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:49.203083', 'step': 1824, 'epoch': 1} {'type': 'loss', 'content': 0.22157001495361328, 'timestamp': '2025-09-10 02:34:49.205121', 'step': 1825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:49.257953', 'step': 1825, 'epoch': 1} {'type': 'loss', 'content': 0.18467991054058075, 'timestamp': '2025-09-10 02:34:49.260094', 'step': 1826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:49.312997', 'step': 1826, 'epoch': 1} {'type': 'loss', 'content': 0.1838645339012146, 'timestamp': '2025-09-10 02:34:49.314961', 'step': 1827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:49.369107', 'step': 1827, 'epoch': 1} {'type': 'loss', 'content': 0.10394159704446793, 'timestamp': '2025-09-10 02:34:49.374976', 'step': 1828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:49.427375', 'step': 1828, 'epoch': 1} {'type': 'loss', 'content': 0.15946218371391296, 'timestamp': '2025-09-10 02:34:49.429187', 'step': 1829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:49.481814', 'step': 1829, 'epoch': 1} {'type': 'loss', 'content': 0.07893367111682892, 'timestamp': '2025-09-10 02:34:49.484014', 'step': 1830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:49.537199', 'step': 1830, 'epoch': 1} {'type': 'loss', 'content': 0.1704379916191101, 'timestamp': '2025-09-10 02:34:49.539020', 'step': 1831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:49.591526', 'step': 1831, 'epoch': 1} {'type': 'loss', 'content': 0.18164941668510437, 'timestamp': '2025-09-10 02:34:49.597142', 'step': 1832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:49.649322', 'step': 1832, 'epoch': 1} {'type': 'loss', 'content': 0.2289074957370758, 'timestamp': '2025-09-10 02:34:49.651114', 'step': 1833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:49.703741', 'step': 1833, 'epoch': 1} {'type': 'loss', 'content': 0.21841974556446075, 'timestamp': '2025-09-10 02:34:49.705758', 'step': 1834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:49.758627', 'step': 1834, 'epoch': 1} {'type': 'loss', 'content': 0.1490427702665329, 'timestamp': '2025-09-10 02:34:49.760678', 'step': 1835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:49.813234', 'step': 1835, 'epoch': 1} {'type': 'loss', 'content': 0.2018498033285141, 'timestamp': '2025-09-10 02:34:49.820131', 'step': 1836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:49.873610', 'step': 1836, 'epoch': 1} {'type': 'loss', 'content': 0.1173369362950325, 'timestamp': '2025-09-10 02:34:49.875850', 'step': 1837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:34:49.928907', 'step': 1837, 'epoch': 1} {'type': 'loss', 'content': 0.21703585982322693, 'timestamp': '2025-09-10 02:34:49.931025', 'step': 1838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:49.983632', 'step': 1838, 'epoch': 1} {'type': 'loss', 'content': 0.16747742891311646, 'timestamp': '2025-09-10 02:34:49.985774', 'step': 1839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:50.038268', 'step': 1839, 'epoch': 1} {'type': 'loss', 'content': 0.2253676950931549, 'timestamp': '2025-09-10 02:34:50.043885', 'step': 1840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:50.096345', 'step': 1840, 'epoch': 1} {'type': 'loss', 'content': 0.1457301527261734, 'timestamp': '2025-09-10 02:34:50.098304', 'step': 1841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:50.150801', 'step': 1841, 'epoch': 1} {'type': 'loss', 'content': 0.16160094738006592, 'timestamp': '2025-09-10 02:34:50.152837', 'step': 1842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:50.207091', 'step': 1842, 'epoch': 1} {'type': 'loss', 'content': 0.16579024493694305, 'timestamp': '2025-09-10 02:34:50.209139', 'step': 1843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:50.262054', 'step': 1843, 'epoch': 1} {'type': 'loss', 'content': 0.20486973226070404, 'timestamp': '2025-09-10 02:34:50.267768', 'step': 1844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:50.319752', 'step': 1844, 'epoch': 1} {'type': 'loss', 'content': 0.16379767656326294, 'timestamp': '2025-09-10 02:34:50.321801', 'step': 1845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:50.375566', 'step': 1845, 'epoch': 1} {'type': 'loss', 'content': 0.2059287279844284, 'timestamp': '2025-09-10 02:34:50.377696', 'step': 1846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:50.433219', 'step': 1846, 'epoch': 1} {'type': 'loss', 'content': 0.14334169030189514, 'timestamp': '2025-09-10 02:34:50.435366', 'step': 1847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:50.494074', 'step': 1847, 'epoch': 1} {'type': 'loss', 'content': 0.187997967004776, 'timestamp': '2025-09-10 02:34:50.500527', 'step': 1848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:50.553917', 'step': 1848, 'epoch': 1} {'type': 'loss', 'content': 0.1998005211353302, 'timestamp': '2025-09-10 02:34:50.556137', 'step': 1849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:50.610176', 'step': 1849, 'epoch': 1} {'type': 'loss', 'content': 0.16954904794692993, 'timestamp': '2025-09-10 02:34:50.612451', 'step': 1850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:50.665551', 'step': 1850, 'epoch': 1} {'type': 'loss', 'content': 0.16610634326934814, 'timestamp': '2025-09-10 02:34:50.667688', 'step': 1851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:50.720230', 'step': 1851, 'epoch': 1} {'type': 'loss', 'content': 0.19034606218338013, 'timestamp': '2025-09-10 02:34:50.726241', 'step': 1852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:50.778690', 'step': 1852, 'epoch': 1} {'type': 'loss', 'content': 0.24447010457515717, 'timestamp': '2025-09-10 02:34:50.780758', 'step': 1853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:50.833129', 'step': 1853, 'epoch': 1} {'type': 'loss', 'content': 0.14317883551120758, 'timestamp': '2025-09-10 02:34:50.835024', 'step': 1854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:50.896728', 'step': 1854, 'epoch': 1} {'type': 'loss', 'content': 0.16180890798568726, 'timestamp': '2025-09-10 02:34:50.898550', 'step': 1855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:50.951461', 'step': 1855, 'epoch': 1} {'type': 'loss', 'content': 0.13854849338531494, 'timestamp': '2025-09-10 02:34:50.957240', 'step': 1856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:51.009993', 'step': 1856, 'epoch': 1} {'type': 'loss', 'content': 0.1478039175271988, 'timestamp': '2025-09-10 02:34:51.011973', 'step': 1857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:34:51.064520', 'step': 1857, 'epoch': 1} {'type': 'loss', 'content': 0.08901144564151764, 'timestamp': '2025-09-10 02:34:51.066365', 'step': 1858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:51.119634', 'step': 1858, 'epoch': 1} {'type': 'loss', 'content': 0.1027192696928978, 'timestamp': '2025-09-10 02:34:51.121509', 'step': 1859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:51.173837', 'step': 1859, 'epoch': 1} {'type': 'loss', 'content': 0.1363086849451065, 'timestamp': '2025-09-10 02:34:51.179679', 'step': 1860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:34:51.232503', 'step': 1860, 'epoch': 1} {'type': 'loss', 'content': 0.15190139412879944, 'timestamp': '2025-09-10 02:34:51.234450', 'step': 1861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:51.288494', 'step': 1861, 'epoch': 1} {'type': 'loss', 'content': 0.18685391545295715, 'timestamp': '2025-09-10 02:34:51.292450', 'step': 1862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:51.349507', 'step': 1862, 'epoch': 1} {'type': 'loss', 'content': 0.19079166650772095, 'timestamp': '2025-09-10 02:34:51.351707', 'step': 1863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:34:51.405267', 'step': 1863, 'epoch': 1} {'type': 'loss', 'content': 0.31850385665893555, 'timestamp': '2025-09-10 02:34:51.411145', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:35:04.191193', 'step': 1864, 'epoch': 1} {'type': 'pplx', 'content': 11412.753485260842, 'timestamp': '2025-09-10 02:35:04.194308', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:04.248501', 'step': 1864, 'epoch': 1} {'type': 'loss', 'content': 0.1839197874069214, 'timestamp': '2025-09-10 02:35:04.250455', 'step': 1865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:04.304285', 'step': 1865, 'epoch': 1} {'type': 'loss', 'content': 0.2584443986415863, 'timestamp': '2025-09-10 02:35:04.306056', 'step': 1866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:04.359303', 'step': 1866, 'epoch': 1} {'type': 'loss', 'content': 0.09188114106655121, 'timestamp': '2025-09-10 02:35:04.361466', 'step': 1867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:04.415330', 'step': 1867, 'epoch': 1} {'type': 'loss', 'content': 0.2706260085105896, 'timestamp': '2025-09-10 02:35:04.421116', 'step': 1868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:04.477562', 'step': 1868, 'epoch': 1} {'type': 'loss', 'content': 0.12357824295759201, 'timestamp': '2025-09-10 02:35:04.479358', 'step': 1869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:04.534518', 'step': 1869, 'epoch': 1} {'type': 'loss', 'content': 0.18413645029067993, 'timestamp': '2025-09-10 02:35:04.536376', 'step': 1870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:04.591546', 'step': 1870, 'epoch': 1} {'type': 'loss', 'content': 0.21831834316253662, 'timestamp': '2025-09-10 02:35:04.593370', 'step': 1871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:04.647312', 'step': 1871, 'epoch': 1} {'type': 'loss', 'content': 0.1676599085330963, 'timestamp': '2025-09-10 02:35:04.653733', 'step': 1872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:04.707966', 'step': 1872, 'epoch': 1} {'type': 'loss', 'content': 0.24232342839241028, 'timestamp': '2025-09-10 02:35:04.710232', 'step': 1873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:04.763838', 'step': 1873, 'epoch': 1} {'type': 'loss', 'content': 0.13413873314857483, 'timestamp': '2025-09-10 02:35:04.766026', 'step': 1874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:04.819591', 'step': 1874, 'epoch': 1} {'type': 'loss', 'content': 0.18151399493217468, 'timestamp': '2025-09-10 02:35:04.821880', 'step': 1875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:04.875618', 'step': 1875, 'epoch': 1} {'type': 'loss', 'content': 0.14039695262908936, 'timestamp': '2025-09-10 02:35:04.881834', 'step': 1876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:04.934973', 'step': 1876, 'epoch': 1} {'type': 'loss', 'content': 0.19751814007759094, 'timestamp': '2025-09-10 02:35:04.937354', 'step': 1877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:04.990703', 'step': 1877, 'epoch': 1} {'type': 'loss', 'content': 0.13859476149082184, 'timestamp': '2025-09-10 02:35:04.992987', 'step': 1878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:05.046814', 'step': 1878, 'epoch': 1} {'type': 'loss', 'content': 0.12348785996437073, 'timestamp': '2025-09-10 02:35:05.049764', 'step': 1879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:05.103881', 'step': 1879, 'epoch': 1} {'type': 'loss', 'content': 0.19849790632724762, 'timestamp': '2025-09-10 02:35:05.110201', 'step': 1880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:05.163803', 'step': 1880, 'epoch': 1} {'type': 'loss', 'content': 0.21898576617240906, 'timestamp': '2025-09-10 02:35:05.165995', 'step': 1881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:05.220414', 'step': 1881, 'epoch': 1} {'type': 'loss', 'content': 0.10125318169593811, 'timestamp': '2025-09-10 02:35:05.222701', 'step': 1882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:05.276467', 'step': 1882, 'epoch': 1} {'type': 'loss', 'content': 0.1854504495859146, 'timestamp': '2025-09-10 02:35:05.278698', 'step': 1883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:05.332077', 'step': 1883, 'epoch': 1} {'type': 'loss', 'content': 0.15906447172164917, 'timestamp': '2025-09-10 02:35:05.338324', 'step': 1884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:05.391414', 'step': 1884, 'epoch': 1} {'type': 'loss', 'content': 0.14714425802230835, 'timestamp': '2025-09-10 02:35:05.393602', 'step': 1885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:05.447377', 'step': 1885, 'epoch': 1} {'type': 'loss', 'content': 0.16875053942203522, 'timestamp': '2025-09-10 02:35:05.449917', 'step': 1886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:05.503924', 'step': 1886, 'epoch': 1} {'type': 'loss', 'content': 0.14232237637043, 'timestamp': '2025-09-10 02:35:05.506101', 'step': 1887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:05.559414', 'step': 1887, 'epoch': 1} {'type': 'loss', 'content': 0.21685296297073364, 'timestamp': '2025-09-10 02:35:05.565576', 'step': 1888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:05.618423', 'step': 1888, 'epoch': 1} {'type': 'loss', 'content': 0.13665081560611725, 'timestamp': '2025-09-10 02:35:05.620637', 'step': 1889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:05.674439', 'step': 1889, 'epoch': 1} {'type': 'loss', 'content': 0.23674462735652924, 'timestamp': '2025-09-10 02:35:05.676767', 'step': 1890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:05.731501', 'step': 1890, 'epoch': 1} {'type': 'loss', 'content': 0.17429640889167786, 'timestamp': '2025-09-10 02:35:05.733800', 'step': 1891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:05.788092', 'step': 1891, 'epoch': 1} {'type': 'loss', 'content': 0.22337430715560913, 'timestamp': '2025-09-10 02:35:05.794565', 'step': 1892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:05.847547', 'step': 1892, 'epoch': 1} {'type': 'loss', 'content': 0.1266307234764099, 'timestamp': '2025-09-10 02:35:05.849858', 'step': 1893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:05.903156', 'step': 1893, 'epoch': 1} {'type': 'loss', 'content': 0.19940857589244843, 'timestamp': '2025-09-10 02:35:05.905404', 'step': 1894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:05.958470', 'step': 1894, 'epoch': 1} {'type': 'loss', 'content': 0.16526061296463013, 'timestamp': '2025-09-10 02:35:05.960838', 'step': 1895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:06.014303', 'step': 1895, 'epoch': 1} {'type': 'loss', 'content': 0.2211490273475647, 'timestamp': '2025-09-10 02:35:06.020459', 'step': 1896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:06.072848', 'step': 1896, 'epoch': 1} {'type': 'loss', 'content': 0.15892767906188965, 'timestamp': '2025-09-10 02:35:06.075084', 'step': 1897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:06.128650', 'step': 1897, 'epoch': 1} {'type': 'loss', 'content': 0.12489178776741028, 'timestamp': '2025-09-10 02:35:06.130992', 'step': 1898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:06.186863', 'step': 1898, 'epoch': 1} {'type': 'loss', 'content': 0.12959274649620056, 'timestamp': '2025-09-10 02:35:06.189200', 'step': 1899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:06.244833', 'step': 1899, 'epoch': 1} {'type': 'loss', 'content': 0.19536496698856354, 'timestamp': '2025-09-10 02:35:06.251372', 'step': 1900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:06.306945', 'step': 1900, 'epoch': 1} {'type': 'loss', 'content': 0.2098790556192398, 'timestamp': '2025-09-10 02:35:06.309343', 'step': 1901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:06.363310', 'step': 1901, 'epoch': 1} {'type': 'loss', 'content': 0.3007834851741791, 'timestamp': '2025-09-10 02:35:06.366751', 'step': 1902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:06.420945', 'step': 1902, 'epoch': 1} {'type': 'loss', 'content': 0.2450592815876007, 'timestamp': '2025-09-10 02:35:06.423230', 'step': 1903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:06.477156', 'step': 1903, 'epoch': 1} {'type': 'loss', 'content': 0.28241926431655884, 'timestamp': '2025-09-10 02:35:06.483337', 'step': 1904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:06.536769', 'step': 1904, 'epoch': 1} {'type': 'loss', 'content': 0.19021715223789215, 'timestamp': '2025-09-10 02:35:06.539018', 'step': 1905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:06.593935', 'step': 1905, 'epoch': 1} {'type': 'loss', 'content': 0.21348899602890015, 'timestamp': '2025-09-10 02:35:06.596302', 'step': 1906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:06.654171', 'step': 1906, 'epoch': 1} {'type': 'loss', 'content': 0.18457509577274323, 'timestamp': '2025-09-10 02:35:06.656476', 'step': 1907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:06.712939', 'step': 1907, 'epoch': 1} {'type': 'loss', 'content': 0.22564566135406494, 'timestamp': '2025-09-10 02:35:06.719389', 'step': 1908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:06.772727', 'step': 1908, 'epoch': 1} {'type': 'loss', 'content': 0.20313167572021484, 'timestamp': '2025-09-10 02:35:06.774825', 'step': 1909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:06.827892', 'step': 1909, 'epoch': 1} {'type': 'loss', 'content': 0.20762917399406433, 'timestamp': '2025-09-10 02:35:06.830067', 'step': 1910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:06.883622', 'step': 1910, 'epoch': 1} {'type': 'loss', 'content': 0.17150340974330902, 'timestamp': '2025-09-10 02:35:06.886074', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:06.939477', 'step': 1911, 'epoch': 1} {'type': 'loss', 'content': 0.25358444452285767, 'timestamp': '2025-09-10 02:35:06.945691', 'step': 1912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:06.998464', 'step': 1912, 'epoch': 1} {'type': 'loss', 'content': 0.150652214884758, 'timestamp': '2025-09-10 02:35:07.000946', 'step': 1913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:07.055611', 'step': 1913, 'epoch': 1} {'type': 'loss', 'content': 0.21579258143901825, 'timestamp': '2025-09-10 02:35:07.057875', 'step': 1914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:07.112095', 'step': 1914, 'epoch': 1} {'type': 'loss', 'content': 0.26045307517051697, 'timestamp': '2025-09-10 02:35:07.114538', 'step': 1915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:07.168160', 'step': 1915, 'epoch': 1} {'type': 'loss', 'content': 0.15168796479701996, 'timestamp': '2025-09-10 02:35:07.174523', 'step': 1916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:07.231322', 'step': 1916, 'epoch': 1} {'type': 'loss', 'content': 0.15088209509849548, 'timestamp': '2025-09-10 02:35:07.233648', 'step': 1917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:07.288815', 'step': 1917, 'epoch': 1} {'type': 'loss', 'content': 0.19608445465564728, 'timestamp': '2025-09-10 02:35:07.291333', 'step': 1918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:07.346162', 'step': 1918, 'epoch': 1} {'type': 'loss', 'content': 0.15028592944145203, 'timestamp': '2025-09-10 02:35:07.348483', 'step': 1919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:07.405786', 'step': 1919, 'epoch': 1} {'type': 'loss', 'content': 0.19906528294086456, 'timestamp': '2025-09-10 02:35:07.412151', 'step': 1920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:07.465778', 'step': 1920, 'epoch': 1} {'type': 'loss', 'content': 0.24367690086364746, 'timestamp': '2025-09-10 02:35:07.468011', 'step': 1921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:07.522161', 'step': 1921, 'epoch': 1} {'type': 'loss', 'content': 0.14070945978164673, 'timestamp': '2025-09-10 02:35:07.524413', 'step': 1922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:07.578217', 'step': 1922, 'epoch': 1} {'type': 'loss', 'content': 0.11095008999109268, 'timestamp': '2025-09-10 02:35:07.580490', 'step': 1923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:07.633698', 'step': 1923, 'epoch': 1} {'type': 'loss', 'content': 0.22551259398460388, 'timestamp': '2025-09-10 02:35:07.640074', 'step': 1924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:07.693894', 'step': 1924, 'epoch': 1} {'type': 'loss', 'content': 0.21697062253952026, 'timestamp': '2025-09-10 02:35:07.696187', 'step': 1925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:07.750854', 'step': 1925, 'epoch': 1} {'type': 'loss', 'content': 0.2390318512916565, 'timestamp': '2025-09-10 02:35:07.753140', 'step': 1926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:07.807270', 'step': 1926, 'epoch': 1} {'type': 'loss', 'content': 0.12539473176002502, 'timestamp': '2025-09-10 02:35:07.809561', 'step': 1927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:07.863421', 'step': 1927, 'epoch': 1} {'type': 'loss', 'content': 0.23779930174350739, 'timestamp': '2025-09-10 02:35:07.869742', 'step': 1928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:07.923647', 'step': 1928, 'epoch': 1} {'type': 'loss', 'content': 0.22020936012268066, 'timestamp': '2025-09-10 02:35:07.925868', 'step': 1929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:07.980704', 'step': 1929, 'epoch': 1} {'type': 'loss', 'content': 0.18752609193325043, 'timestamp': '2025-09-10 02:35:07.983165', 'step': 1930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:08.039747', 'step': 1930, 'epoch': 1} {'type': 'loss', 'content': 0.2030019611120224, 'timestamp': '2025-09-10 02:35:08.042351', 'step': 1931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:08.096819', 'step': 1931, 'epoch': 1} {'type': 'loss', 'content': 0.12549547851085663, 'timestamp': '2025-09-10 02:35:08.103229', 'step': 1932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:08.157115', 'step': 1932, 'epoch': 1} {'type': 'loss', 'content': 0.22294728457927704, 'timestamp': '2025-09-10 02:35:08.159416', 'step': 1933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:08.213817', 'step': 1933, 'epoch': 1} {'type': 'loss', 'content': 0.18147101998329163, 'timestamp': '2025-09-10 02:35:08.216154', 'step': 1934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:08.270863', 'step': 1934, 'epoch': 1} {'type': 'loss', 'content': 0.12286896258592606, 'timestamp': '2025-09-10 02:35:08.273201', 'step': 1935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:08.327675', 'step': 1935, 'epoch': 1} {'type': 'loss', 'content': 0.2498874068260193, 'timestamp': '2025-09-10 02:35:08.334142', 'step': 1936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:08.386973', 'step': 1936, 'epoch': 1} {'type': 'loss', 'content': 0.15691617131233215, 'timestamp': '2025-09-10 02:35:08.389137', 'step': 1937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:08.442081', 'step': 1937, 'epoch': 1} {'type': 'loss', 'content': 0.26188498735427856, 'timestamp': '2025-09-10 02:35:08.444285', 'step': 1938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:08.497444', 'step': 1938, 'epoch': 1} {'type': 'loss', 'content': 0.1887887567281723, 'timestamp': '2025-09-10 02:35:08.499627', 'step': 1939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:08.552675', 'step': 1939, 'epoch': 1} {'type': 'loss', 'content': 0.16775618493556976, 'timestamp': '2025-09-10 02:35:08.558774', 'step': 1940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:08.610811', 'step': 1940, 'epoch': 1} {'type': 'loss', 'content': 0.26723402738571167, 'timestamp': '2025-09-10 02:35:08.612953', 'step': 1941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:08.666343', 'step': 1941, 'epoch': 1} {'type': 'loss', 'content': 0.1971723735332489, 'timestamp': '2025-09-10 02:35:08.668653', 'step': 1942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:08.722021', 'step': 1942, 'epoch': 1} {'type': 'loss', 'content': 0.16550488770008087, 'timestamp': '2025-09-10 02:35:08.724203', 'step': 1943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:08.777397', 'step': 1943, 'epoch': 1} {'type': 'loss', 'content': 0.23049399256706238, 'timestamp': '2025-09-10 02:35:08.783915', 'step': 1944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:08.836699', 'step': 1944, 'epoch': 1} {'type': 'loss', 'content': 0.2869133949279785, 'timestamp': '2025-09-10 02:35:08.838994', 'step': 1945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:08.892625', 'step': 1945, 'epoch': 1} {'type': 'loss', 'content': 0.14833539724349976, 'timestamp': '2025-09-10 02:35:08.895197', 'step': 1946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:08.948535', 'step': 1946, 'epoch': 1} {'type': 'loss', 'content': 0.18811383843421936, 'timestamp': '2025-09-10 02:35:08.950724', 'step': 1947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:09.003661', 'step': 1947, 'epoch': 1} {'type': 'loss', 'content': 0.18923895061016083, 'timestamp': '2025-09-10 02:35:09.009668', 'step': 1948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:09.061426', 'step': 1948, 'epoch': 1} {'type': 'loss', 'content': 0.31090033054351807, 'timestamp': '2025-09-10 02:35:09.063967', 'step': 1949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:09.116212', 'step': 1949, 'epoch': 1} {'type': 'loss', 'content': 0.17573976516723633, 'timestamp': '2025-09-10 02:35:09.118481', 'step': 1950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:09.171106', 'step': 1950, 'epoch': 1} {'type': 'loss', 'content': 0.29259204864501953, 'timestamp': '2025-09-10 02:35:09.173351', 'step': 1951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:09.226326', 'step': 1951, 'epoch': 1} {'type': 'loss', 'content': 0.12795542180538177, 'timestamp': '2025-09-10 02:35:09.232392', 'step': 1952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:09.285604', 'step': 1952, 'epoch': 1} {'type': 'loss', 'content': 0.1674111783504486, 'timestamp': '2025-09-10 02:35:09.287755', 'step': 1953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:09.341695', 'step': 1953, 'epoch': 1} {'type': 'loss', 'content': 0.10660502314567566, 'timestamp': '2025-09-10 02:35:09.343932', 'step': 1954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:09.397627', 'step': 1954, 'epoch': 1} {'type': 'loss', 'content': 0.13419213891029358, 'timestamp': '2025-09-10 02:35:09.399790', 'step': 1955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:09.454571', 'step': 1955, 'epoch': 1} {'type': 'loss', 'content': 0.1562563180923462, 'timestamp': '2025-09-10 02:35:09.460726', 'step': 1956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:09.513465', 'step': 1956, 'epoch': 1} {'type': 'loss', 'content': 0.20958757400512695, 'timestamp': '2025-09-10 02:35:09.515729', 'step': 1957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:09.568409', 'step': 1957, 'epoch': 1} {'type': 'loss', 'content': 0.19542868435382843, 'timestamp': '2025-09-10 02:35:09.570733', 'step': 1958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:09.629639', 'step': 1958, 'epoch': 1} {'type': 'loss', 'content': 0.1842709332704544, 'timestamp': '2025-09-10 02:35:09.631964', 'step': 1959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:09.685462', 'step': 1959, 'epoch': 1} {'type': 'loss', 'content': 0.20228196680545807, 'timestamp': '2025-09-10 02:35:09.691726', 'step': 1960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:09.745398', 'step': 1960, 'epoch': 1} {'type': 'loss', 'content': 0.13581699132919312, 'timestamp': '2025-09-10 02:35:09.747368', 'step': 1961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:09.800251', 'step': 1961, 'epoch': 1} {'type': 'loss', 'content': 0.16232429444789886, 'timestamp': '2025-09-10 02:35:09.802251', 'step': 1962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:09.854810', 'step': 1962, 'epoch': 1} {'type': 'loss', 'content': 0.14239874482154846, 'timestamp': '2025-09-10 02:35:09.856939', 'step': 1963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:09.910227', 'step': 1963, 'epoch': 1} {'type': 'loss', 'content': 0.12348222732543945, 'timestamp': '2025-09-10 02:35:09.916278', 'step': 1964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:09.967963', 'step': 1964, 'epoch': 1} {'type': 'loss', 'content': 0.15576733648777008, 'timestamp': '2025-09-10 02:35:09.969947', 'step': 1965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:10.022170', 'step': 1965, 'epoch': 1} {'type': 'loss', 'content': 0.12531337141990662, 'timestamp': '2025-09-10 02:35:10.024197', 'step': 1966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:10.076617', 'step': 1966, 'epoch': 1} {'type': 'loss', 'content': 0.3575856387615204, 'timestamp': '2025-09-10 02:35:10.078766', 'step': 1967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:10.131227', 'step': 1967, 'epoch': 1} {'type': 'loss', 'content': 0.15336591005325317, 'timestamp': '2025-09-10 02:35:10.137142', 'step': 1968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:10.189120', 'step': 1968, 'epoch': 1} {'type': 'loss', 'content': 0.17050090432167053, 'timestamp': '2025-09-10 02:35:10.191075', 'step': 1969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:10.243259', 'step': 1969, 'epoch': 1} {'type': 'loss', 'content': 0.15135133266448975, 'timestamp': '2025-09-10 02:35:10.245370', 'step': 1970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:10.297926', 'step': 1970, 'epoch': 1} {'type': 'loss', 'content': 0.2485920637845993, 'timestamp': '2025-09-10 02:35:10.300057', 'step': 1971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:10.353744', 'step': 1971, 'epoch': 1} {'type': 'loss', 'content': 0.11353757977485657, 'timestamp': '2025-09-10 02:35:10.359861', 'step': 1972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:10.415089', 'step': 1972, 'epoch': 1} {'type': 'loss', 'content': 0.178374782204628, 'timestamp': '2025-09-10 02:35:10.417485', 'step': 1973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:10.471946', 'step': 1973, 'epoch': 1} {'type': 'loss', 'content': 0.2171809822320938, 'timestamp': '2025-09-10 02:35:10.474270', 'step': 1974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:10.528192', 'step': 1974, 'epoch': 1} {'type': 'loss', 'content': 0.15098239481449127, 'timestamp': '2025-09-10 02:35:10.530455', 'step': 1975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:10.584092', 'step': 1975, 'epoch': 1} {'type': 'loss', 'content': 0.15096551179885864, 'timestamp': '2025-09-10 02:35:10.590294', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:10.643371', 'step': 1976, 'epoch': 1} {'type': 'loss', 'content': 0.1778492033481598, 'timestamp': '2025-09-10 02:35:10.645301', 'step': 1977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:10.700262', 'step': 1977, 'epoch': 1} {'type': 'loss', 'content': 0.1677243709564209, 'timestamp': '2025-09-10 02:35:10.702301', 'step': 1978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:10.755699', 'step': 1978, 'epoch': 1} {'type': 'loss', 'content': 0.24871279299259186, 'timestamp': '2025-09-10 02:35:10.757643', 'step': 1979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:10.813393', 'step': 1979, 'epoch': 1} {'type': 'loss', 'content': 0.148431196808815, 'timestamp': '2025-09-10 02:35:10.819604', 'step': 1980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:10.873422', 'step': 1980, 'epoch': 1} {'type': 'loss', 'content': 0.1832447499036789, 'timestamp': '2025-09-10 02:35:10.875374', 'step': 1981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:10.928507', 'step': 1981, 'epoch': 1} {'type': 'loss', 'content': 0.2673993706703186, 'timestamp': '2025-09-10 02:35:10.930469', 'step': 1982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:10.985062', 'step': 1982, 'epoch': 1} {'type': 'loss', 'content': 0.23114149272441864, 'timestamp': '2025-09-10 02:35:10.987053', 'step': 1983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:11.039662', 'step': 1983, 'epoch': 1} {'type': 'loss', 'content': 0.24833548069000244, 'timestamp': '2025-09-10 02:35:11.045524', 'step': 1984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:11.098081', 'step': 1984, 'epoch': 1} {'type': 'loss', 'content': 0.16219665110111237, 'timestamp': '2025-09-10 02:35:11.100153', 'step': 1985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:11.152616', 'step': 1985, 'epoch': 1} {'type': 'loss', 'content': 0.1407274305820465, 'timestamp': '2025-09-10 02:35:11.154611', 'step': 1986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:11.206781', 'step': 1986, 'epoch': 1} {'type': 'loss', 'content': 0.24266451597213745, 'timestamp': '2025-09-10 02:35:11.208762', 'step': 1987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:11.262579', 'step': 1987, 'epoch': 1} {'type': 'loss', 'content': 0.2329484075307846, 'timestamp': '2025-09-10 02:35:11.268662', 'step': 1988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:11.322112', 'step': 1988, 'epoch': 1} {'type': 'loss', 'content': 0.2890438139438629, 'timestamp': '2025-09-10 02:35:11.324325', 'step': 1989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:11.377309', 'step': 1989, 'epoch': 1} {'type': 'loss', 'content': 0.14409273862838745, 'timestamp': '2025-09-10 02:35:11.379233', 'step': 1990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:11.432371', 'step': 1990, 'epoch': 1} {'type': 'loss', 'content': 0.16234268248081207, 'timestamp': '2025-09-10 02:35:11.434381', 'step': 1991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:11.487308', 'step': 1991, 'epoch': 1} {'type': 'loss', 'content': 0.1599833071231842, 'timestamp': '2025-09-10 02:35:11.493396', 'step': 1992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:11.546285', 'step': 1992, 'epoch': 1} {'type': 'loss', 'content': 0.1668364405632019, 'timestamp': '2025-09-10 02:35:11.548291', 'step': 1993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:11.601204', 'step': 1993, 'epoch': 1} {'type': 'loss', 'content': 0.19798116385936737, 'timestamp': '2025-09-10 02:35:11.603363', 'step': 1994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:11.656719', 'step': 1994, 'epoch': 1} {'type': 'loss', 'content': 0.23258943855762482, 'timestamp': '2025-09-10 02:35:11.658687', 'step': 1995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:11.711089', 'step': 1995, 'epoch': 1} {'type': 'loss', 'content': 0.18311980366706848, 'timestamp': '2025-09-10 02:35:11.717092', 'step': 1996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:11.768848', 'step': 1996, 'epoch': 1} {'type': 'loss', 'content': 0.2122812718153, 'timestamp': '2025-09-10 02:35:11.770797', 'step': 1997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:11.823120', 'step': 1997, 'epoch': 1} {'type': 'loss', 'content': 0.25637152791023254, 'timestamp': '2025-09-10 02:35:11.825133', 'step': 1998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:11.879160', 'step': 1998, 'epoch': 1} {'type': 'loss', 'content': 0.1641395092010498, 'timestamp': '2025-09-10 02:35:11.881225', 'step': 1999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:11.938306', 'step': 1999, 'epoch': 1} {'type': 'loss', 'content': 0.1735258847475052, 'timestamp': '2025-09-10 02:35:11.944424', 'step': 2000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-09-10 02:35:12.463463', 'step': 2000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:12.519990', 'step': 2000, 'epoch': 1} {'type': 'loss', 'content': 0.14202192425727844, 'timestamp': '2025-09-10 02:35:12.522069', 'step': 2001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:12.578364', 'step': 2001, 'epoch': 1} {'type': 'loss', 'content': 0.22379393875598907, 'timestamp': '2025-09-10 02:35:12.580459', 'step': 2002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:12.636219', 'step': 2002, 'epoch': 1} {'type': 'loss', 'content': 0.18472863733768463, 'timestamp': '2025-09-10 02:35:12.638497', 'step': 2003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:12.697701', 'step': 2003, 'epoch': 1} {'type': 'loss', 'content': 0.23045116662979126, 'timestamp': '2025-09-10 02:35:12.704141', 'step': 2004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:12.758299', 'step': 2004, 'epoch': 1} {'type': 'loss', 'content': 0.13519106805324554, 'timestamp': '2025-09-10 02:35:12.760298', 'step': 2005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:12.818004', 'step': 2005, 'epoch': 1} {'type': 'loss', 'content': 0.16096045076847076, 'timestamp': '2025-09-10 02:35:12.820677', 'step': 2006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:12.873606', 'step': 2006, 'epoch': 1} {'type': 'loss', 'content': 0.2172701209783554, 'timestamp': '2025-09-10 02:35:12.875642', 'step': 2007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:12.929956', 'step': 2007, 'epoch': 1} {'type': 'loss', 'content': 0.18184131383895874, 'timestamp': '2025-09-10 02:35:12.935828', 'step': 2008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:12.988064', 'step': 2008, 'epoch': 1} {'type': 'loss', 'content': 0.1467524766921997, 'timestamp': '2025-09-10 02:35:12.989885', 'step': 2009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:13.043862', 'step': 2009, 'epoch': 1} {'type': 'loss', 'content': 0.19960087537765503, 'timestamp': '2025-09-10 02:35:13.045882', 'step': 2010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:13.100595', 'step': 2010, 'epoch': 1} {'type': 'loss', 'content': 0.10841165482997894, 'timestamp': '2025-09-10 02:35:13.102601', 'step': 2011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:13.155664', 'step': 2011, 'epoch': 1} {'type': 'loss', 'content': 0.15817497670650482, 'timestamp': '2025-09-10 02:35:13.161492', 'step': 2012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:13.214519', 'step': 2012, 'epoch': 1} {'type': 'loss', 'content': 0.27317574620246887, 'timestamp': '2025-09-10 02:35:13.217485', 'step': 2013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:13.272428', 'step': 2013, 'epoch': 1} {'type': 'loss', 'content': 0.24037013947963715, 'timestamp': '2025-09-10 02:35:13.274424', 'step': 2014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:13.328157', 'step': 2014, 'epoch': 1} {'type': 'loss', 'content': 0.14083269238471985, 'timestamp': '2025-09-10 02:35:13.329890', 'step': 2015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:13.385503', 'step': 2015, 'epoch': 1} {'type': 'loss', 'content': 0.12173815071582794, 'timestamp': '2025-09-10 02:35:13.391396', 'step': 2016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:13.443924', 'step': 2016, 'epoch': 1} {'type': 'loss', 'content': 0.11282685399055481, 'timestamp': '2025-09-10 02:35:13.445967', 'step': 2017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:13.497730', 'step': 2017, 'epoch': 1} {'type': 'loss', 'content': 0.211013063788414, 'timestamp': '2025-09-10 02:35:13.499770', 'step': 2018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:13.552491', 'step': 2018, 'epoch': 1} {'type': 'loss', 'content': 0.1779991239309311, 'timestamp': '2025-09-10 02:35:13.554576', 'step': 2019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:13.607887', 'step': 2019, 'epoch': 1} {'type': 'loss', 'content': 0.273766428232193, 'timestamp': '2025-09-10 02:35:13.613802', 'step': 2020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:13.665937', 'step': 2020, 'epoch': 1} {'type': 'loss', 'content': 0.17650392651557922, 'timestamp': '2025-09-10 02:35:13.667960', 'step': 2021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:13.722157', 'step': 2021, 'epoch': 1} {'type': 'loss', 'content': 0.15862752497196198, 'timestamp': '2025-09-10 02:35:13.724444', 'step': 2022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:13.777759', 'step': 2022, 'epoch': 1} {'type': 'loss', 'content': 0.2344757467508316, 'timestamp': '2025-09-10 02:35:13.779498', 'step': 2023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:13.834328', 'step': 2023, 'epoch': 1} {'type': 'loss', 'content': 0.1593317687511444, 'timestamp': '2025-09-10 02:35:13.840163', 'step': 2024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:13.892179', 'step': 2024, 'epoch': 1} {'type': 'loss', 'content': 0.23151403665542603, 'timestamp': '2025-09-10 02:35:13.893954', 'step': 2025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:13.945861', 'step': 2025, 'epoch': 1} {'type': 'loss', 'content': 0.22347941994667053, 'timestamp': '2025-09-10 02:35:13.947605', 'step': 2026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:14.000666', 'step': 2026, 'epoch': 1} {'type': 'loss', 'content': 0.18587447702884674, 'timestamp': '2025-09-10 02:35:14.002916', 'step': 2027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:14.056017', 'step': 2027, 'epoch': 1} {'type': 'loss', 'content': 0.13657571375370026, 'timestamp': '2025-09-10 02:35:14.061805', 'step': 2028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:14.115298', 'step': 2028, 'epoch': 1} {'type': 'loss', 'content': 0.16178405284881592, 'timestamp': '2025-09-10 02:35:14.117261', 'step': 2029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:14.171209', 'step': 2029, 'epoch': 1} {'type': 'loss', 'content': 0.1707276552915573, 'timestamp': '2025-09-10 02:35:14.173097', 'step': 2030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:14.226544', 'step': 2030, 'epoch': 1} {'type': 'loss', 'content': 0.11978346109390259, 'timestamp': '2025-09-10 02:35:14.229022', 'step': 2031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:14.281908', 'step': 2031, 'epoch': 1} {'type': 'loss', 'content': 0.14823400974273682, 'timestamp': '2025-09-10 02:35:14.288064', 'step': 2032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:14.340923', 'step': 2032, 'epoch': 1} {'type': 'loss', 'content': 0.2830437421798706, 'timestamp': '2025-09-10 02:35:14.343081', 'step': 2033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:14.396608', 'step': 2033, 'epoch': 1} {'type': 'loss', 'content': 0.1581367552280426, 'timestamp': '2025-09-10 02:35:14.398785', 'step': 2034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:14.452531', 'step': 2034, 'epoch': 1} {'type': 'loss', 'content': 0.09824394434690475, 'timestamp': '2025-09-10 02:35:14.454637', 'step': 2035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:14.509145', 'step': 2035, 'epoch': 1} {'type': 'loss', 'content': 0.23045532405376434, 'timestamp': '2025-09-10 02:35:14.515641', 'step': 2036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:14.569834', 'step': 2036, 'epoch': 1} {'type': 'loss', 'content': 0.16341084241867065, 'timestamp': '2025-09-10 02:35:14.572330', 'step': 2037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:14.625932', 'step': 2037, 'epoch': 1} {'type': 'loss', 'content': 0.27162566781044006, 'timestamp': '2025-09-10 02:35:14.630134', 'step': 2038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:14.683492', 'step': 2038, 'epoch': 1} {'type': 'loss', 'content': 0.21892237663269043, 'timestamp': '2025-09-10 02:35:14.685671', 'step': 2039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:14.739401', 'step': 2039, 'epoch': 1} {'type': 'loss', 'content': 0.13452842831611633, 'timestamp': '2025-09-10 02:35:14.745738', 'step': 2040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:14.798282', 'step': 2040, 'epoch': 1} {'type': 'loss', 'content': 0.17630966007709503, 'timestamp': '2025-09-10 02:35:14.800445', 'step': 2041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:14.853494', 'step': 2041, 'epoch': 1} {'type': 'loss', 'content': 0.14219066500663757, 'timestamp': '2025-09-10 02:35:14.855617', 'step': 2042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:14.909583', 'step': 2042, 'epoch': 1} {'type': 'loss', 'content': 0.2270304262638092, 'timestamp': '2025-09-10 02:35:14.911667', 'step': 2043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:14.964662', 'step': 2043, 'epoch': 1} {'type': 'loss', 'content': 0.11385694146156311, 'timestamp': '2025-09-10 02:35:14.970620', 'step': 2044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:15.024826', 'step': 2044, 'epoch': 1} {'type': 'loss', 'content': 0.1960696578025818, 'timestamp': '2025-09-10 02:35:15.027050', 'step': 2045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:15.079620', 'step': 2045, 'epoch': 1} {'type': 'loss', 'content': 0.0982724130153656, 'timestamp': '2025-09-10 02:35:15.081925', 'step': 2046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:15.134579', 'step': 2046, 'epoch': 1} {'type': 'loss', 'content': 0.16670894622802734, 'timestamp': '2025-09-10 02:35:15.136748', 'step': 2047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:15.190410', 'step': 2047, 'epoch': 1} {'type': 'loss', 'content': 0.22885611653327942, 'timestamp': '2025-09-10 02:35:15.196371', 'step': 2048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:15.249063', 'step': 2048, 'epoch': 1} {'type': 'loss', 'content': 0.12352202832698822, 'timestamp': '2025-09-10 02:35:15.251177', 'step': 2049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:15.306225', 'step': 2049, 'epoch': 1} {'type': 'loss', 'content': 0.17142941057682037, 'timestamp': '2025-09-10 02:35:15.308425', 'step': 2050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:15.362194', 'step': 2050, 'epoch': 1} {'type': 'loss', 'content': 0.2131635844707489, 'timestamp': '2025-09-10 02:35:15.364300', 'step': 2051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:15.416905', 'step': 2051, 'epoch': 1} {'type': 'loss', 'content': 0.17760305106639862, 'timestamp': '2025-09-10 02:35:15.423549', 'step': 2052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:15.476449', 'step': 2052, 'epoch': 1} {'type': 'loss', 'content': 0.2291332334280014, 'timestamp': '2025-09-10 02:35:15.478414', 'step': 2053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:15.532238', 'step': 2053, 'epoch': 1} {'type': 'loss', 'content': 0.16160494089126587, 'timestamp': '2025-09-10 02:35:15.534233', 'step': 2054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:15.586361', 'step': 2054, 'epoch': 1} {'type': 'loss', 'content': 0.12043008208274841, 'timestamp': '2025-09-10 02:35:15.588566', 'step': 2055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:15.646813', 'step': 2055, 'epoch': 1} {'type': 'loss', 'content': 0.12558045983314514, 'timestamp': '2025-09-10 02:35:15.652567', 'step': 2056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:15.704448', 'step': 2056, 'epoch': 1} {'type': 'loss', 'content': 0.1284043788909912, 'timestamp': '2025-09-10 02:35:15.706644', 'step': 2057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:15.760118', 'step': 2057, 'epoch': 1} {'type': 'loss', 'content': 0.2224709540605545, 'timestamp': '2025-09-10 02:35:15.762427', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:15.815980', 'step': 2058, 'epoch': 1} {'type': 'loss', 'content': 0.12570761144161224, 'timestamp': '2025-09-10 02:35:15.818474', 'step': 2059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:15.872106', 'step': 2059, 'epoch': 1} {'type': 'loss', 'content': 0.13119526207447052, 'timestamp': '2025-09-10 02:35:15.878385', 'step': 2060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:15.931556', 'step': 2060, 'epoch': 1} {'type': 'loss', 'content': 0.13564540445804596, 'timestamp': '2025-09-10 02:35:15.933698', 'step': 2061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:15.986233', 'step': 2061, 'epoch': 1} {'type': 'loss', 'content': 0.13957712054252625, 'timestamp': '2025-09-10 02:35:15.989522', 'step': 2062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:16.044313', 'step': 2062, 'epoch': 1} {'type': 'loss', 'content': 0.20224878191947937, 'timestamp': '2025-09-10 02:35:16.046758', 'step': 2063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:16.100327', 'step': 2063, 'epoch': 1} {'type': 'loss', 'content': 0.23890748620033264, 'timestamp': '2025-09-10 02:35:16.106238', 'step': 2064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:16.160080', 'step': 2064, 'epoch': 1} {'type': 'loss', 'content': 0.1739695519208908, 'timestamp': '2025-09-10 02:35:16.162354', 'step': 2065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:16.215125', 'step': 2065, 'epoch': 1} {'type': 'loss', 'content': 0.1494462639093399, 'timestamp': '2025-09-10 02:35:16.217258', 'step': 2066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:16.270158', 'step': 2066, 'epoch': 1} {'type': 'loss', 'content': 0.17824281752109528, 'timestamp': '2025-09-10 02:35:16.272522', 'step': 2067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:16.326532', 'step': 2067, 'epoch': 1} {'type': 'loss', 'content': 0.14901494979858398, 'timestamp': '2025-09-10 02:35:16.332854', 'step': 2068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:16.385387', 'step': 2068, 'epoch': 1} {'type': 'loss', 'content': 0.2361663430929184, 'timestamp': '2025-09-10 02:35:16.387709', 'step': 2069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:16.441631', 'step': 2069, 'epoch': 1} {'type': 'loss', 'content': 0.16563226282596588, 'timestamp': '2025-09-10 02:35:16.443974', 'step': 2070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:16.497359', 'step': 2070, 'epoch': 1} {'type': 'loss', 'content': 0.22783367335796356, 'timestamp': '2025-09-10 02:35:16.499673', 'step': 2071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:16.552885', 'step': 2071, 'epoch': 1} {'type': 'loss', 'content': 0.20747266709804535, 'timestamp': '2025-09-10 02:35:16.558949', 'step': 2072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:16.611403', 'step': 2072, 'epoch': 1} {'type': 'loss', 'content': 0.1380009949207306, 'timestamp': '2025-09-10 02:35:16.613698', 'step': 2073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:16.667678', 'step': 2073, 'epoch': 1} {'type': 'loss', 'content': 0.1644328385591507, 'timestamp': '2025-09-10 02:35:16.670110', 'step': 2074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:16.723407', 'step': 2074, 'epoch': 1} {'type': 'loss', 'content': 0.12429254502058029, 'timestamp': '2025-09-10 02:35:16.725998', 'step': 2075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:16.779286', 'step': 2075, 'epoch': 1} {'type': 'loss', 'content': 0.12970732152462006, 'timestamp': '2025-09-10 02:35:16.786534', 'step': 2076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:16.841982', 'step': 2076, 'epoch': 1} {'type': 'loss', 'content': 0.2985478639602661, 'timestamp': '2025-09-10 02:35:16.844368', 'step': 2077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:16.905063', 'step': 2077, 'epoch': 1} {'type': 'loss', 'content': 0.21745549142360687, 'timestamp': '2025-09-10 02:35:16.908414', 'step': 2078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:16.968693', 'step': 2078, 'epoch': 1} {'type': 'loss', 'content': 0.12404198199510574, 'timestamp': '2025-09-10 02:35:16.970961', 'step': 2079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:17.024124', 'step': 2079, 'epoch': 1} {'type': 'loss', 'content': 0.19484034180641174, 'timestamp': '2025-09-10 02:35:17.030280', 'step': 2080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:17.082413', 'step': 2080, 'epoch': 1} {'type': 'loss', 'content': 0.18387162685394287, 'timestamp': '2025-09-10 02:35:17.084442', 'step': 2081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:17.137079', 'step': 2081, 'epoch': 1} {'type': 'loss', 'content': 0.13130266964435577, 'timestamp': '2025-09-10 02:35:17.139293', 'step': 2082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.192216', 'step': 2082, 'epoch': 1} {'type': 'loss', 'content': 0.14880312979221344, 'timestamp': '2025-09-10 02:35:17.194359', 'step': 2083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.247138', 'step': 2083, 'epoch': 1} {'type': 'loss', 'content': 0.1382341831922531, 'timestamp': '2025-09-10 02:35:17.253248', 'step': 2084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:17.305740', 'step': 2084, 'epoch': 1} {'type': 'loss', 'content': 0.13980922102928162, 'timestamp': '2025-09-10 02:35:17.308187', 'step': 2085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:17.364016', 'step': 2085, 'epoch': 1} {'type': 'loss', 'content': 0.14491622149944305, 'timestamp': '2025-09-10 02:35:17.366408', 'step': 2086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.419571', 'step': 2086, 'epoch': 1} {'type': 'loss', 'content': 0.2326991707086563, 'timestamp': '2025-09-10 02:35:17.421759', 'step': 2087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:17.474779', 'step': 2087, 'epoch': 1} {'type': 'loss', 'content': 0.12459930032491684, 'timestamp': '2025-09-10 02:35:17.480854', 'step': 2088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.533491', 'step': 2088, 'epoch': 1} {'type': 'loss', 'content': 0.1757207065820694, 'timestamp': '2025-09-10 02:35:17.535690', 'step': 2089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.588534', 'step': 2089, 'epoch': 1} {'type': 'loss', 'content': 0.1324489414691925, 'timestamp': '2025-09-10 02:35:17.592594', 'step': 2090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.647809', 'step': 2090, 'epoch': 1} {'type': 'loss', 'content': 0.21580374240875244, 'timestamp': '2025-09-10 02:35:17.649991', 'step': 2091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.703716', 'step': 2091, 'epoch': 1} {'type': 'loss', 'content': 0.2198098599910736, 'timestamp': '2025-09-10 02:35:17.709614', 'step': 2092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:17.763799', 'step': 2092, 'epoch': 1} {'type': 'loss', 'content': 0.2821918725967407, 'timestamp': '2025-09-10 02:35:17.766031', 'step': 2093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.821978', 'step': 2093, 'epoch': 1} {'type': 'loss', 'content': 0.17813679575920105, 'timestamp': '2025-09-10 02:35:17.824224', 'step': 2094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.877303', 'step': 2094, 'epoch': 1} {'type': 'loss', 'content': 0.14063052833080292, 'timestamp': '2025-09-10 02:35:17.879442', 'step': 2095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:17.933021', 'step': 2095, 'epoch': 1} {'type': 'loss', 'content': 0.11202948540449142, 'timestamp': '2025-09-10 02:35:17.939152', 'step': 2096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:17.993913', 'step': 2096, 'epoch': 1} {'type': 'loss', 'content': 0.1797322928905487, 'timestamp': '2025-09-10 02:35:17.998295', 'step': 2097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:18.057017', 'step': 2097, 'epoch': 1} {'type': 'loss', 'content': 0.2675084173679352, 'timestamp': '2025-09-10 02:35:18.059064', 'step': 2098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:18.112339', 'step': 2098, 'epoch': 1} {'type': 'loss', 'content': 0.18634898960590363, 'timestamp': '2025-09-10 02:35:18.114645', 'step': 2099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:18.168972', 'step': 2099, 'epoch': 1} {'type': 'loss', 'content': 0.1530231237411499, 'timestamp': '2025-09-10 02:35:18.177845', 'step': 2100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:18.234634', 'step': 2100, 'epoch': 1} {'type': 'loss', 'content': 0.17343515157699585, 'timestamp': '2025-09-10 02:35:18.236728', 'step': 2101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:18.289281', 'step': 2101, 'epoch': 1} {'type': 'loss', 'content': 0.23995943367481232, 'timestamp': '2025-09-10 02:35:18.291478', 'step': 2102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:18.344747', 'step': 2102, 'epoch': 1} {'type': 'loss', 'content': 0.18358632922172546, 'timestamp': '2025-09-10 02:35:18.347257', 'step': 2103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:18.401271', 'step': 2103, 'epoch': 1} {'type': 'loss', 'content': 0.17165105044841766, 'timestamp': '2025-09-10 02:35:18.407393', 'step': 2104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:18.461054', 'step': 2104, 'epoch': 1} {'type': 'loss', 'content': 0.15284015238285065, 'timestamp': '2025-09-10 02:35:18.463286', 'step': 2105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:18.516449', 'step': 2105, 'epoch': 1} {'type': 'loss', 'content': 0.19866934418678284, 'timestamp': '2025-09-10 02:35:18.518666', 'step': 2106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:18.572711', 'step': 2106, 'epoch': 1} {'type': 'loss', 'content': 0.20223164558410645, 'timestamp': '2025-09-10 02:35:18.574987', 'step': 2107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:18.627840', 'step': 2107, 'epoch': 1} {'type': 'loss', 'content': 0.19671426713466644, 'timestamp': '2025-09-10 02:35:18.633755', 'step': 2108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:18.685620', 'step': 2108, 'epoch': 1} {'type': 'loss', 'content': 0.198182612657547, 'timestamp': '2025-09-10 02:35:18.687804', 'step': 2109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:18.740848', 'step': 2109, 'epoch': 1} {'type': 'loss', 'content': 0.11685346066951752, 'timestamp': '2025-09-10 02:35:18.743058', 'step': 2110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:18.796020', 'step': 2110, 'epoch': 1} {'type': 'loss', 'content': 0.23880994319915771, 'timestamp': '2025-09-10 02:35:18.798310', 'step': 2111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:18.850991', 'step': 2111, 'epoch': 1} {'type': 'loss', 'content': 0.16804105043411255, 'timestamp': '2025-09-10 02:35:18.857035', 'step': 2112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:18.909743', 'step': 2112, 'epoch': 1} {'type': 'loss', 'content': 0.14345566928386688, 'timestamp': '2025-09-10 02:35:18.912092', 'step': 2113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:18.965244', 'step': 2113, 'epoch': 1} {'type': 'loss', 'content': 0.15855281054973602, 'timestamp': '2025-09-10 02:35:18.967492', 'step': 2114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:19.021513', 'step': 2114, 'epoch': 1} {'type': 'loss', 'content': 0.3097461760044098, 'timestamp': '2025-09-10 02:35:19.023736', 'step': 2115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:19.077645', 'step': 2115, 'epoch': 1} {'type': 'loss', 'content': 0.2468605637550354, 'timestamp': '2025-09-10 02:35:19.083620', 'step': 2116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:19.136293', 'step': 2116, 'epoch': 1} {'type': 'loss', 'content': 0.20296220481395721, 'timestamp': '2025-09-10 02:35:19.138643', 'step': 2117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:19.191379', 'step': 2117, 'epoch': 1} {'type': 'loss', 'content': 0.15256400406360626, 'timestamp': '2025-09-10 02:35:19.193746', 'step': 2118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:19.247010', 'step': 2118, 'epoch': 1} {'type': 'loss', 'content': 0.23911143839359283, 'timestamp': '2025-09-10 02:35:19.249287', 'step': 2119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:19.302382', 'step': 2119, 'epoch': 1} {'type': 'loss', 'content': 0.18775159120559692, 'timestamp': '2025-09-10 02:35:19.308258', 'step': 2120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:19.360614', 'step': 2120, 'epoch': 1} {'type': 'loss', 'content': 0.13172556459903717, 'timestamp': '2025-09-10 02:35:19.362798', 'step': 2121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:19.415988', 'step': 2121, 'epoch': 1} {'type': 'loss', 'content': 0.19015946984291077, 'timestamp': '2025-09-10 02:35:19.418131', 'step': 2122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:19.471083', 'step': 2122, 'epoch': 1} {'type': 'loss', 'content': 0.1626613587141037, 'timestamp': '2025-09-10 02:35:19.473400', 'step': 2123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:19.526483', 'step': 2123, 'epoch': 1} {'type': 'loss', 'content': 0.15627819299697876, 'timestamp': '2025-09-10 02:35:19.532434', 'step': 2124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:19.586879', 'step': 2124, 'epoch': 1} {'type': 'loss', 'content': 0.16910985112190247, 'timestamp': '2025-09-10 02:35:19.589213', 'step': 2125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:19.642083', 'step': 2125, 'epoch': 1} {'type': 'loss', 'content': 0.15838861465454102, 'timestamp': '2025-09-10 02:35:19.644190', 'step': 2126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:19.698642', 'step': 2126, 'epoch': 1} {'type': 'loss', 'content': 0.1339055299758911, 'timestamp': '2025-09-10 02:35:19.700895', 'step': 2127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:19.754422', 'step': 2127, 'epoch': 1} {'type': 'loss', 'content': 0.3046063482761383, 'timestamp': '2025-09-10 02:35:19.760250', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:19.812633', 'step': 2128, 'epoch': 1} {'type': 'loss', 'content': 0.265813946723938, 'timestamp': '2025-09-10 02:35:19.814787', 'step': 2129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:19.868783', 'step': 2129, 'epoch': 1} {'type': 'loss', 'content': 0.17282426357269287, 'timestamp': '2025-09-10 02:35:19.870915', 'step': 2130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:19.927966', 'step': 2130, 'epoch': 1} {'type': 'loss', 'content': 0.17203933000564575, 'timestamp': '2025-09-10 02:35:19.930074', 'step': 2131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:19.984393', 'step': 2131, 'epoch': 1} {'type': 'loss', 'content': 0.21939262747764587, 'timestamp': '2025-09-10 02:35:19.990384', 'step': 2132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:20.043070', 'step': 2132, 'epoch': 1} {'type': 'loss', 'content': 0.1258016675710678, 'timestamp': '2025-09-10 02:35:20.045388', 'step': 2133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:20.099379', 'step': 2133, 'epoch': 1} {'type': 'loss', 'content': 0.11662692576646805, 'timestamp': '2025-09-10 02:35:20.101733', 'step': 2134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:20.156151', 'step': 2134, 'epoch': 1} {'type': 'loss', 'content': 0.35198312997817993, 'timestamp': '2025-09-10 02:35:20.158457', 'step': 2135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:20.215778', 'step': 2135, 'epoch': 1} {'type': 'loss', 'content': 0.11293718963861465, 'timestamp': '2025-09-10 02:35:20.221756', 'step': 2136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:20.275119', 'step': 2136, 'epoch': 1} {'type': 'loss', 'content': 0.1770758330821991, 'timestamp': '2025-09-10 02:35:20.277324', 'step': 2137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:20.330721', 'step': 2137, 'epoch': 1} {'type': 'loss', 'content': 0.2183316946029663, 'timestamp': '2025-09-10 02:35:20.332968', 'step': 2138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:20.387714', 'step': 2138, 'epoch': 1} {'type': 'loss', 'content': 0.09887439757585526, 'timestamp': '2025-09-10 02:35:20.389897', 'step': 2139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:20.442833', 'step': 2139, 'epoch': 1} {'type': 'loss', 'content': 0.20237316191196442, 'timestamp': '2025-09-10 02:35:20.448690', 'step': 2140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:20.501089', 'step': 2140, 'epoch': 1} {'type': 'loss', 'content': 0.17240217328071594, 'timestamp': '2025-09-10 02:35:20.503296', 'step': 2141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:20.556010', 'step': 2141, 'epoch': 1} {'type': 'loss', 'content': 0.12671980261802673, 'timestamp': '2025-09-10 02:35:20.558218', 'step': 2142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:20.615496', 'step': 2142, 'epoch': 1} {'type': 'loss', 'content': 0.17676293849945068, 'timestamp': '2025-09-10 02:35:20.617647', 'step': 2143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:20.676806', 'step': 2143, 'epoch': 1} {'type': 'loss', 'content': 0.16854269802570343, 'timestamp': '2025-09-10 02:35:20.682562', 'step': 2144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:20.735334', 'step': 2144, 'epoch': 1} {'type': 'loss', 'content': 0.16217051446437836, 'timestamp': '2025-09-10 02:35:20.739091', 'step': 2145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:20.794626', 'step': 2145, 'epoch': 1} {'type': 'loss', 'content': 0.1679236888885498, 'timestamp': '2025-09-10 02:35:20.797058', 'step': 2146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:20.850258', 'step': 2146, 'epoch': 1} {'type': 'loss', 'content': 0.1373751312494278, 'timestamp': '2025-09-10 02:35:20.852631', 'step': 2147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:20.908895', 'step': 2147, 'epoch': 1} {'type': 'loss', 'content': 0.20121407508850098, 'timestamp': '2025-09-10 02:35:20.914826', 'step': 2148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:20.967223', 'step': 2148, 'epoch': 1} {'type': 'loss', 'content': 0.15415659546852112, 'timestamp': '2025-09-10 02:35:20.969340', 'step': 2149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:21.022755', 'step': 2149, 'epoch': 1} {'type': 'loss', 'content': 0.11905571818351746, 'timestamp': '2025-09-10 02:35:21.024807', 'step': 2150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:21.077677', 'step': 2150, 'epoch': 1} {'type': 'loss', 'content': 0.24977517127990723, 'timestamp': '2025-09-10 02:35:21.079692', 'step': 2151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:21.133062', 'step': 2151, 'epoch': 1} {'type': 'loss', 'content': 0.15415175259113312, 'timestamp': '2025-09-10 02:35:21.138876', 'step': 2152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:21.191598', 'step': 2152, 'epoch': 1} {'type': 'loss', 'content': 0.10744388401508331, 'timestamp': '2025-09-10 02:35:21.193813', 'step': 2153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:21.246665', 'step': 2153, 'epoch': 1} {'type': 'loss', 'content': 0.1462632715702057, 'timestamp': '2025-09-10 02:35:21.248857', 'step': 2154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:21.301719', 'step': 2154, 'epoch': 1} {'type': 'loss', 'content': 0.1492006480693817, 'timestamp': '2025-09-10 02:35:21.304102', 'step': 2155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:21.357667', 'step': 2155, 'epoch': 1} {'type': 'loss', 'content': 0.2344900220632553, 'timestamp': '2025-09-10 02:35:21.363492', 'step': 2156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:21.416347', 'step': 2156, 'epoch': 1} {'type': 'loss', 'content': 0.22801010310649872, 'timestamp': '2025-09-10 02:35:21.418778', 'step': 2157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:21.472488', 'step': 2157, 'epoch': 1} {'type': 'loss', 'content': 0.1473693698644638, 'timestamp': '2025-09-10 02:35:21.474693', 'step': 2158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:21.546292', 'step': 2158, 'epoch': 1} {'type': 'loss', 'content': 0.24818480014801025, 'timestamp': '2025-09-10 02:35:21.548356', 'step': 2159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:21.601962', 'step': 2159, 'epoch': 1} {'type': 'loss', 'content': 0.15438713133335114, 'timestamp': '2025-09-10 02:35:21.608029', 'step': 2160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:21.662861', 'step': 2160, 'epoch': 1} {'type': 'loss', 'content': 0.16489507257938385, 'timestamp': '2025-09-10 02:35:21.665258', 'step': 2161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:21.720582', 'step': 2161, 'epoch': 1} {'type': 'loss', 'content': 0.2349012792110443, 'timestamp': '2025-09-10 02:35:21.722842', 'step': 2162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:21.775872', 'step': 2162, 'epoch': 1} {'type': 'loss', 'content': 0.22996774315834045, 'timestamp': '2025-09-10 02:35:21.778118', 'step': 2163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:21.831351', 'step': 2163, 'epoch': 1} {'type': 'loss', 'content': 0.2140420377254486, 'timestamp': '2025-09-10 02:35:21.837236', 'step': 2164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:21.890480', 'step': 2164, 'epoch': 1} {'type': 'loss', 'content': 0.19570957124233246, 'timestamp': '2025-09-10 02:35:21.892676', 'step': 2165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:21.945692', 'step': 2165, 'epoch': 1} {'type': 'loss', 'content': 0.1325848549604416, 'timestamp': '2025-09-10 02:35:21.947935', 'step': 2166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:22.000592', 'step': 2166, 'epoch': 1} {'type': 'loss', 'content': 0.1722913384437561, 'timestamp': '2025-09-10 02:35:22.002816', 'step': 2167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:22.056569', 'step': 2167, 'epoch': 1} {'type': 'loss', 'content': 0.27257299423217773, 'timestamp': '2025-09-10 02:35:22.062378', 'step': 2168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:22.115751', 'step': 2168, 'epoch': 1} {'type': 'loss', 'content': 0.2150764912366867, 'timestamp': '2025-09-10 02:35:22.117923', 'step': 2169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:22.170903', 'step': 2169, 'epoch': 1} {'type': 'loss', 'content': 0.16093288362026215, 'timestamp': '2025-09-10 02:35:22.173082', 'step': 2170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:22.226745', 'step': 2170, 'epoch': 1} {'type': 'loss', 'content': 0.2379225194454193, 'timestamp': '2025-09-10 02:35:22.228938', 'step': 2171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:22.281913', 'step': 2171, 'epoch': 1} {'type': 'loss', 'content': 0.29375869035720825, 'timestamp': '2025-09-10 02:35:22.287811', 'step': 2172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:22.340462', 'step': 2172, 'epoch': 1} {'type': 'loss', 'content': 0.20718291401863098, 'timestamp': '2025-09-10 02:35:22.342670', 'step': 2173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:22.395722', 'step': 2173, 'epoch': 1} {'type': 'loss', 'content': 0.21022701263427734, 'timestamp': '2025-09-10 02:35:22.397894', 'step': 2174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:22.453757', 'step': 2174, 'epoch': 1} {'type': 'loss', 'content': 0.2455037534236908, 'timestamp': '2025-09-10 02:35:22.456307', 'step': 2175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:22.509457', 'step': 2175, 'epoch': 1} {'type': 'loss', 'content': 0.18601185083389282, 'timestamp': '2025-09-10 02:35:22.515530', 'step': 2176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:22.568239', 'step': 2176, 'epoch': 1} {'type': 'loss', 'content': 0.1443762183189392, 'timestamp': '2025-09-10 02:35:22.570709', 'step': 2177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:22.625262', 'step': 2177, 'epoch': 1} {'type': 'loss', 'content': 0.12224356085062027, 'timestamp': '2025-09-10 02:35:22.627597', 'step': 2178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:22.681055', 'step': 2178, 'epoch': 1} {'type': 'loss', 'content': 0.11685942113399506, 'timestamp': '2025-09-10 02:35:22.683337', 'step': 2179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:22.737486', 'step': 2179, 'epoch': 1} {'type': 'loss', 'content': 0.2284516990184784, 'timestamp': '2025-09-10 02:35:22.743546', 'step': 2180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:22.796297', 'step': 2180, 'epoch': 1} {'type': 'loss', 'content': 0.17757657170295715, 'timestamp': '2025-09-10 02:35:22.798556', 'step': 2181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:22.851662', 'step': 2181, 'epoch': 1} {'type': 'loss', 'content': 0.2886209487915039, 'timestamp': '2025-09-10 02:35:22.853853', 'step': 2182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:22.907239', 'step': 2182, 'epoch': 1} {'type': 'loss', 'content': 0.11546166986227036, 'timestamp': '2025-09-10 02:35:22.909438', 'step': 2183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:22.962883', 'step': 2183, 'epoch': 1} {'type': 'loss', 'content': 0.25666671991348267, 'timestamp': '2025-09-10 02:35:22.968841', 'step': 2184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:23.021907', 'step': 2184, 'epoch': 1} {'type': 'loss', 'content': 0.2643067240715027, 'timestamp': '2025-09-10 02:35:23.024160', 'step': 2185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:23.077235', 'step': 2185, 'epoch': 1} {'type': 'loss', 'content': 0.2114223688840866, 'timestamp': '2025-09-10 02:35:23.079472', 'step': 2186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:23.133408', 'step': 2186, 'epoch': 1} {'type': 'loss', 'content': 0.15434107184410095, 'timestamp': '2025-09-10 02:35:23.135604', 'step': 2187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:23.189408', 'step': 2187, 'epoch': 1} {'type': 'loss', 'content': 0.26398780941963196, 'timestamp': '2025-09-10 02:35:23.195496', 'step': 2188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:23.249094', 'step': 2188, 'epoch': 1} {'type': 'loss', 'content': 0.17031213641166687, 'timestamp': '2025-09-10 02:35:23.251526', 'step': 2189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:23.306497', 'step': 2189, 'epoch': 1} {'type': 'loss', 'content': 0.15693065524101257, 'timestamp': '2025-09-10 02:35:23.308841', 'step': 2190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:23.364517', 'step': 2190, 'epoch': 1} {'type': 'loss', 'content': 0.15275020897388458, 'timestamp': '2025-09-10 02:35:23.366659', 'step': 2191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:23.420368', 'step': 2191, 'epoch': 1} {'type': 'loss', 'content': 0.16291776299476624, 'timestamp': '2025-09-10 02:35:23.426598', 'step': 2192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:23.479976', 'step': 2192, 'epoch': 1} {'type': 'loss', 'content': 0.1589120626449585, 'timestamp': '2025-09-10 02:35:23.482302', 'step': 2193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:23.538688', 'step': 2193, 'epoch': 1} {'type': 'loss', 'content': 0.18666839599609375, 'timestamp': '2025-09-10 02:35:23.540941', 'step': 2194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:23.594092', 'step': 2194, 'epoch': 1} {'type': 'loss', 'content': 0.18874888122081757, 'timestamp': '2025-09-10 02:35:23.596889', 'step': 2195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:23.652665', 'step': 2195, 'epoch': 1} {'type': 'loss', 'content': 0.18547914922237396, 'timestamp': '2025-09-10 02:35:23.658423', 'step': 2196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:23.714022', 'step': 2196, 'epoch': 1} {'type': 'loss', 'content': 0.14530017971992493, 'timestamp': '2025-09-10 02:35:23.716202', 'step': 2197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:23.773542', 'step': 2197, 'epoch': 1} {'type': 'loss', 'content': 0.09435204416513443, 'timestamp': '2025-09-10 02:35:23.775853', 'step': 2198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:23.832905', 'step': 2198, 'epoch': 1} {'type': 'loss', 'content': 0.18348515033721924, 'timestamp': '2025-09-10 02:35:23.837563', 'step': 2199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:23.896500', 'step': 2199, 'epoch': 1} {'type': 'loss', 'content': 0.26094210147857666, 'timestamp': '2025-09-10 02:35:23.902883', 'step': 2200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:23.956789', 'step': 2200, 'epoch': 1} {'type': 'loss', 'content': 0.11982621252536774, 'timestamp': '2025-09-10 02:35:23.959139', 'step': 2201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:24.013265', 'step': 2201, 'epoch': 1} {'type': 'loss', 'content': 0.23480792343616486, 'timestamp': '2025-09-10 02:35:24.018446', 'step': 2202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:24.072652', 'step': 2202, 'epoch': 1} {'type': 'loss', 'content': 0.18718849122524261, 'timestamp': '2025-09-10 02:35:24.079673', 'step': 2203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:24.133604', 'step': 2203, 'epoch': 1} {'type': 'loss', 'content': 0.10326578468084335, 'timestamp': '2025-09-10 02:35:24.139983', 'step': 2204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:24.193483', 'step': 2204, 'epoch': 1} {'type': 'loss', 'content': 0.12499784678220749, 'timestamp': '2025-09-10 02:35:24.195795', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:24.255452', 'step': 2205, 'epoch': 1} {'type': 'loss', 'content': 0.277246356010437, 'timestamp': '2025-09-10 02:35:24.257638', 'step': 2206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:24.310632', 'step': 2206, 'epoch': 1} {'type': 'loss', 'content': 0.17971983551979065, 'timestamp': '2025-09-10 02:35:24.312994', 'step': 2207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:24.368549', 'step': 2207, 'epoch': 1} {'type': 'loss', 'content': 0.2499503344297409, 'timestamp': '2025-09-10 02:35:24.374678', 'step': 2208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:24.427806', 'step': 2208, 'epoch': 1} {'type': 'loss', 'content': 0.14727158844470978, 'timestamp': '2025-09-10 02:35:24.429966', 'step': 2209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:24.483396', 'step': 2209, 'epoch': 1} {'type': 'loss', 'content': 0.1957646906375885, 'timestamp': '2025-09-10 02:35:24.485612', 'step': 2210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:24.539142', 'step': 2210, 'epoch': 1} {'type': 'loss', 'content': 0.325934499502182, 'timestamp': '2025-09-10 02:35:24.541381', 'step': 2211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:24.595584', 'step': 2211, 'epoch': 1} {'type': 'loss', 'content': 0.3245405852794647, 'timestamp': '2025-09-10 02:35:24.601678', 'step': 2212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:24.654189', 'step': 2212, 'epoch': 1} {'type': 'loss', 'content': 0.0720762386918068, 'timestamp': '2025-09-10 02:35:24.656316', 'step': 2213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:24.709291', 'step': 2213, 'epoch': 1} {'type': 'loss', 'content': 0.22537025809288025, 'timestamp': '2025-09-10 02:35:24.711489', 'step': 2214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:24.766559', 'step': 2214, 'epoch': 1} {'type': 'loss', 'content': 0.2099214345216751, 'timestamp': '2025-09-10 02:35:24.768761', 'step': 2215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:24.822400', 'step': 2215, 'epoch': 1} {'type': 'loss', 'content': 0.23855192959308624, 'timestamp': '2025-09-10 02:35:24.828322', 'step': 2216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:24.882734', 'step': 2216, 'epoch': 1} {'type': 'loss', 'content': 0.11357785016298294, 'timestamp': '2025-09-10 02:35:24.884984', 'step': 2217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:24.938829', 'step': 2217, 'epoch': 1} {'type': 'loss', 'content': 0.2639162838459015, 'timestamp': '2025-09-10 02:35:24.941257', 'step': 2218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:25.000956', 'step': 2218, 'epoch': 1} {'type': 'loss', 'content': 0.14659033715724945, 'timestamp': '2025-09-10 02:35:25.003379', 'step': 2219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:25.059802', 'step': 2219, 'epoch': 1} {'type': 'loss', 'content': 0.15587931871414185, 'timestamp': '2025-09-10 02:35:25.066373', 'step': 2220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:25.120885', 'step': 2220, 'epoch': 1} {'type': 'loss', 'content': 0.21248266100883484, 'timestamp': '2025-09-10 02:35:25.123169', 'step': 2221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:25.177189', 'step': 2221, 'epoch': 1} {'type': 'loss', 'content': 0.14084310829639435, 'timestamp': '2025-09-10 02:35:25.179429', 'step': 2222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:25.233073', 'step': 2222, 'epoch': 1} {'type': 'loss', 'content': 0.24025316536426544, 'timestamp': '2025-09-10 02:35:25.235529', 'step': 2223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:25.289491', 'step': 2223, 'epoch': 1} {'type': 'loss', 'content': 0.21011359989643097, 'timestamp': '2025-09-10 02:35:25.295945', 'step': 2224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:25.350551', 'step': 2224, 'epoch': 1} {'type': 'loss', 'content': 0.17078617215156555, 'timestamp': '2025-09-10 02:35:25.352387', 'step': 2225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:25.409603', 'step': 2225, 'epoch': 1} {'type': 'loss', 'content': 0.24143864214420319, 'timestamp': '2025-09-10 02:35:25.411890', 'step': 2226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:25.472976', 'step': 2226, 'epoch': 1} {'type': 'loss', 'content': 0.2261819839477539, 'timestamp': '2025-09-10 02:35:25.477179', 'step': 2227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:25.536720', 'step': 2227, 'epoch': 1} {'type': 'loss', 'content': 0.1359664648771286, 'timestamp': '2025-09-10 02:35:25.543471', 'step': 2228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:25.599054', 'step': 2228, 'epoch': 1} {'type': 'loss', 'content': 0.15757068991661072, 'timestamp': '2025-09-10 02:35:25.601318', 'step': 2229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:25.655637', 'step': 2229, 'epoch': 1} {'type': 'loss', 'content': 0.22604230046272278, 'timestamp': '2025-09-10 02:35:25.657830', 'step': 2230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:25.716990', 'step': 2230, 'epoch': 1} {'type': 'loss', 'content': 0.1288013905286789, 'timestamp': '2025-09-10 02:35:25.719308', 'step': 2231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:25.776378', 'step': 2231, 'epoch': 1} {'type': 'loss', 'content': 0.17238537967205048, 'timestamp': '2025-09-10 02:35:25.783093', 'step': 2232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:25.846415', 'step': 2232, 'epoch': 1} {'type': 'loss', 'content': 0.11161928623914719, 'timestamp': '2025-09-10 02:35:25.848591', 'step': 2233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:25.904231', 'step': 2233, 'epoch': 1} {'type': 'loss', 'content': 0.18717040121555328, 'timestamp': '2025-09-10 02:35:25.908315', 'step': 2234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:25.965189', 'step': 2234, 'epoch': 1} {'type': 'loss', 'content': 0.18985402584075928, 'timestamp': '2025-09-10 02:35:25.967588', 'step': 2235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:26.034746', 'step': 2235, 'epoch': 1} {'type': 'loss', 'content': 0.1760706901550293, 'timestamp': '2025-09-10 02:35:26.041663', 'step': 2236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:26.098753', 'step': 2236, 'epoch': 1} {'type': 'loss', 'content': 0.36645111441612244, 'timestamp': '2025-09-10 02:35:26.101002', 'step': 2237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:26.157180', 'step': 2237, 'epoch': 1} {'type': 'loss', 'content': 0.14008980989456177, 'timestamp': '2025-09-10 02:35:26.159385', 'step': 2238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:26.214969', 'step': 2238, 'epoch': 1} {'type': 'loss', 'content': 0.1825878620147705, 'timestamp': '2025-09-10 02:35:26.221312', 'step': 2239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:26.279518', 'step': 2239, 'epoch': 1} {'type': 'loss', 'content': 0.11541100591421127, 'timestamp': '2025-09-10 02:35:26.286116', 'step': 2240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:26.341840', 'step': 2240, 'epoch': 1} {'type': 'loss', 'content': 0.208143413066864, 'timestamp': '2025-09-10 02:35:26.344031', 'step': 2241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:26.400424', 'step': 2241, 'epoch': 1} {'type': 'loss', 'content': 0.19243714213371277, 'timestamp': '2025-09-10 02:35:26.404462', 'step': 2242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:26.460023', 'step': 2242, 'epoch': 1} {'type': 'loss', 'content': 0.23161160945892334, 'timestamp': '2025-09-10 02:35:26.462330', 'step': 2243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:26.517283', 'step': 2243, 'epoch': 1} {'type': 'loss', 'content': 0.10973599553108215, 'timestamp': '2025-09-10 02:35:26.523656', 'step': 2244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:26.577773', 'step': 2244, 'epoch': 1} {'type': 'loss', 'content': 0.2478196620941162, 'timestamp': '2025-09-10 02:35:26.579703', 'step': 2245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:26.634084', 'step': 2245, 'epoch': 1} {'type': 'loss', 'content': 0.179925799369812, 'timestamp': '2025-09-10 02:35:26.636576', 'step': 2246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:26.690832', 'step': 2246, 'epoch': 1} {'type': 'loss', 'content': 0.2348216325044632, 'timestamp': '2025-09-10 02:35:26.693197', 'step': 2247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:26.747569', 'step': 2247, 'epoch': 1} {'type': 'loss', 'content': 0.16133449971675873, 'timestamp': '2025-09-10 02:35:26.753744', 'step': 2248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:26.807489', 'step': 2248, 'epoch': 1} {'type': 'loss', 'content': 0.18614771962165833, 'timestamp': '2025-09-10 02:35:26.809771', 'step': 2249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:26.864418', 'step': 2249, 'epoch': 1} {'type': 'loss', 'content': 0.18859800696372986, 'timestamp': '2025-09-10 02:35:26.866622', 'step': 2250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:26.920972', 'step': 2250, 'epoch': 1} {'type': 'loss', 'content': 0.15124520659446716, 'timestamp': '2025-09-10 02:35:26.923282', 'step': 2251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:26.978862', 'step': 2251, 'epoch': 1} {'type': 'loss', 'content': 0.19611790776252747, 'timestamp': '2025-09-10 02:35:26.985059', 'step': 2252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:27.039417', 'step': 2252, 'epoch': 1} {'type': 'loss', 'content': 0.1901240348815918, 'timestamp': '2025-09-10 02:35:27.041511', 'step': 2253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:27.095438', 'step': 2253, 'epoch': 1} {'type': 'loss', 'content': 0.15044815838336945, 'timestamp': '2025-09-10 02:35:27.097717', 'step': 2254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:27.151988', 'step': 2254, 'epoch': 1} {'type': 'loss', 'content': 0.19805768132209778, 'timestamp': '2025-09-10 02:35:27.154300', 'step': 2255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:27.207683', 'step': 2255, 'epoch': 1} {'type': 'loss', 'content': 0.16637711226940155, 'timestamp': '2025-09-10 02:35:27.213958', 'step': 2256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:27.267497', 'step': 2256, 'epoch': 1} {'type': 'loss', 'content': 0.21531522274017334, 'timestamp': '2025-09-10 02:35:27.269645', 'step': 2257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:27.323549', 'step': 2257, 'epoch': 1} {'type': 'loss', 'content': 0.11503361910581589, 'timestamp': '2025-09-10 02:35:27.325547', 'step': 2258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:27.379466', 'step': 2258, 'epoch': 1} {'type': 'loss', 'content': 0.26147958636283875, 'timestamp': '2025-09-10 02:35:27.382771', 'step': 2259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:27.436979', 'step': 2259, 'epoch': 1} {'type': 'loss', 'content': 0.18680842220783234, 'timestamp': '2025-09-10 02:35:27.443271', 'step': 2260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:27.497001', 'step': 2260, 'epoch': 1} {'type': 'loss', 'content': 0.1854621022939682, 'timestamp': '2025-09-10 02:35:27.499140', 'step': 2261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:27.554526', 'step': 2261, 'epoch': 1} {'type': 'loss', 'content': 0.23862111568450928, 'timestamp': '2025-09-10 02:35:27.556355', 'step': 2262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:27.611025', 'step': 2262, 'epoch': 1} {'type': 'loss', 'content': 0.17663419246673584, 'timestamp': '2025-09-10 02:35:27.613383', 'step': 2263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:27.669406', 'step': 2263, 'epoch': 1} {'type': 'loss', 'content': 0.14587196707725525, 'timestamp': '2025-09-10 02:35:27.675963', 'step': 2264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:27.732345', 'step': 2264, 'epoch': 1} {'type': 'loss', 'content': 0.13715605437755585, 'timestamp': '2025-09-10 02:35:27.734464', 'step': 2265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:27.792219', 'step': 2265, 'epoch': 1} {'type': 'loss', 'content': 0.16054724156856537, 'timestamp': '2025-09-10 02:35:27.794536', 'step': 2266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:27.851007', 'step': 2266, 'epoch': 1} {'type': 'loss', 'content': 0.2025292068719864, 'timestamp': '2025-09-10 02:35:27.853387', 'step': 2267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:27.910571', 'step': 2267, 'epoch': 1} {'type': 'loss', 'content': 0.13909557461738586, 'timestamp': '2025-09-10 02:35:27.917598', 'step': 2268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:27.974921', 'step': 2268, 'epoch': 1} {'type': 'loss', 'content': 0.28909215331077576, 'timestamp': '2025-09-10 02:35:27.977149', 'step': 2269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:28.035957', 'step': 2269, 'epoch': 1} {'type': 'loss', 'content': 0.21002842485904694, 'timestamp': '2025-09-10 02:35:28.038415', 'step': 2270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:28.098572', 'step': 2270, 'epoch': 1} {'type': 'loss', 'content': 0.16692407429218292, 'timestamp': '2025-09-10 02:35:28.100747', 'step': 2271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:28.156794', 'step': 2271, 'epoch': 1} {'type': 'loss', 'content': 0.13879157602787018, 'timestamp': '2025-09-10 02:35:28.162944', 'step': 2272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:28.216766', 'step': 2272, 'epoch': 1} {'type': 'loss', 'content': 0.25497955083847046, 'timestamp': '2025-09-10 02:35:28.219098', 'step': 2273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:28.274827', 'step': 2273, 'epoch': 1} {'type': 'loss', 'content': 0.1597842127084732, 'timestamp': '2025-09-10 02:35:28.277064', 'step': 2274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:28.331190', 'step': 2274, 'epoch': 1} {'type': 'loss', 'content': 0.22106529772281647, 'timestamp': '2025-09-10 02:35:28.333396', 'step': 2275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:28.387197', 'step': 2275, 'epoch': 1} {'type': 'loss', 'content': 0.21971875429153442, 'timestamp': '2025-09-10 02:35:28.393513', 'step': 2276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:28.447186', 'step': 2276, 'epoch': 1} {'type': 'loss', 'content': 0.19812250137329102, 'timestamp': '2025-09-10 02:35:28.449579', 'step': 2277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:28.504490', 'step': 2277, 'epoch': 1} {'type': 'loss', 'content': 0.23709948360919952, 'timestamp': '2025-09-10 02:35:28.506581', 'step': 2278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:28.560325', 'step': 2278, 'epoch': 1} {'type': 'loss', 'content': 0.2504340708255768, 'timestamp': '2025-09-10 02:35:28.562140', 'step': 2279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:28.615871', 'step': 2279, 'epoch': 1} {'type': 'loss', 'content': 0.20064955949783325, 'timestamp': '2025-09-10 02:35:28.621896', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:28.675254', 'step': 2280, 'epoch': 1} {'type': 'loss', 'content': 0.23313122987747192, 'timestamp': '2025-09-10 02:35:28.677460', 'step': 2281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:28.731648', 'step': 2281, 'epoch': 1} {'type': 'loss', 'content': 0.20391128957271576, 'timestamp': '2025-09-10 02:35:28.733965', 'step': 2282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:28.788118', 'step': 2282, 'epoch': 1} {'type': 'loss', 'content': 0.1703832596540451, 'timestamp': '2025-09-10 02:35:28.790337', 'step': 2283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:28.849946', 'step': 2283, 'epoch': 1} {'type': 'loss', 'content': 0.22163468599319458, 'timestamp': '2025-09-10 02:35:28.856376', 'step': 2284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:28.909726', 'step': 2284, 'epoch': 1} {'type': 'loss', 'content': 0.12231303751468658, 'timestamp': '2025-09-10 02:35:28.912047', 'step': 2285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:28.965821', 'step': 2285, 'epoch': 1} {'type': 'loss', 'content': 0.1049015000462532, 'timestamp': '2025-09-10 02:35:28.968060', 'step': 2286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:29.022937', 'step': 2286, 'epoch': 1} {'type': 'loss', 'content': 0.14663615822792053, 'timestamp': '2025-09-10 02:35:29.025205', 'step': 2287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:29.080779', 'step': 2287, 'epoch': 1} {'type': 'loss', 'content': 0.1786714345216751, 'timestamp': '2025-09-10 02:35:29.086941', 'step': 2288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:29.140636', 'step': 2288, 'epoch': 1} {'type': 'loss', 'content': 0.24378474056720734, 'timestamp': '2025-09-10 02:35:29.143316', 'step': 2289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:29.198001', 'step': 2289, 'epoch': 1} {'type': 'loss', 'content': 0.21631455421447754, 'timestamp': '2025-09-10 02:35:29.200581', 'step': 2290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:29.256201', 'step': 2290, 'epoch': 1} {'type': 'loss', 'content': 0.2603168189525604, 'timestamp': '2025-09-10 02:35:29.258605', 'step': 2291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:29.315179', 'step': 2291, 'epoch': 1} {'type': 'loss', 'content': 0.16319049894809723, 'timestamp': '2025-09-10 02:35:29.321946', 'step': 2292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:29.378662', 'step': 2292, 'epoch': 1} {'type': 'loss', 'content': 0.19380168616771698, 'timestamp': '2025-09-10 02:35:29.380879', 'step': 2293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:29.435543', 'step': 2293, 'epoch': 1} {'type': 'loss', 'content': 0.1982438564300537, 'timestamp': '2025-09-10 02:35:29.437780', 'step': 2294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:29.493199', 'step': 2294, 'epoch': 1} {'type': 'loss', 'content': 0.24079805612564087, 'timestamp': '2025-09-10 02:35:29.495640', 'step': 2295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:29.550922', 'step': 2295, 'epoch': 1} {'type': 'loss', 'content': 0.1465604454278946, 'timestamp': '2025-09-10 02:35:29.557484', 'step': 2296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:29.612596', 'step': 2296, 'epoch': 1} {'type': 'loss', 'content': 0.20208801329135895, 'timestamp': '2025-09-10 02:35:29.614772', 'step': 2297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:29.671063', 'step': 2297, 'epoch': 1} {'type': 'loss', 'content': 0.12342569231987, 'timestamp': '2025-09-10 02:35:29.673642', 'step': 2298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:29.730782', 'step': 2298, 'epoch': 1} {'type': 'loss', 'content': 0.17948122322559357, 'timestamp': '2025-09-10 02:35:29.733052', 'step': 2299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:29.788936', 'step': 2299, 'epoch': 1} {'type': 'loss', 'content': 0.2058400958776474, 'timestamp': '2025-09-10 02:35:29.795709', 'step': 2300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:29.850689', 'step': 2300, 'epoch': 1} {'type': 'loss', 'content': 0.151717409491539, 'timestamp': '2025-09-10 02:35:29.857903', 'step': 2301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:29.919680', 'step': 2301, 'epoch': 1} {'type': 'loss', 'content': 0.1711745709180832, 'timestamp': '2025-09-10 02:35:29.921869', 'step': 2302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:29.976566', 'step': 2302, 'epoch': 1} {'type': 'loss', 'content': 0.23473072052001953, 'timestamp': '2025-09-10 02:35:29.978948', 'step': 2303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:30.033131', 'step': 2303, 'epoch': 1} {'type': 'loss', 'content': 0.17696210741996765, 'timestamp': '2025-09-10 02:35:30.039498', 'step': 2304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:30.094022', 'step': 2304, 'epoch': 1} {'type': 'loss', 'content': 0.1863246113061905, 'timestamp': '2025-09-10 02:35:30.098740', 'step': 2305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:30.157528', 'step': 2305, 'epoch': 1} {'type': 'loss', 'content': 0.22932936251163483, 'timestamp': '2025-09-10 02:35:30.160048', 'step': 2306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:30.220253', 'step': 2306, 'epoch': 1} {'type': 'loss', 'content': 0.14673607051372528, 'timestamp': '2025-09-10 02:35:30.222622', 'step': 2307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:30.279460', 'step': 2307, 'epoch': 1} {'type': 'loss', 'content': 0.3222677707672119, 'timestamp': '2025-09-10 02:35:30.286363', 'step': 2308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:30.341631', 'step': 2308, 'epoch': 1} {'type': 'loss', 'content': 0.22503206133842468, 'timestamp': '2025-09-10 02:35:30.343884', 'step': 2309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:30.399994', 'step': 2309, 'epoch': 1} {'type': 'loss', 'content': 0.2142830193042755, 'timestamp': '2025-09-10 02:35:30.402549', 'step': 2310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:30.460264', 'step': 2310, 'epoch': 1} {'type': 'loss', 'content': 0.11874126642942429, 'timestamp': '2025-09-10 02:35:30.462476', 'step': 2311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:30.518961', 'step': 2311, 'epoch': 1} {'type': 'loss', 'content': 0.165425643324852, 'timestamp': '2025-09-10 02:35:30.525818', 'step': 2312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:30.581602', 'step': 2312, 'epoch': 1} {'type': 'loss', 'content': 0.13799884915351868, 'timestamp': '2025-09-10 02:35:30.583903', 'step': 2313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:30.640618', 'step': 2313, 'epoch': 1} {'type': 'loss', 'content': 0.12623877823352814, 'timestamp': '2025-09-10 02:35:30.642831', 'step': 2314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:30.699152', 'step': 2314, 'epoch': 1} {'type': 'loss', 'content': 0.1456093192100525, 'timestamp': '2025-09-10 02:35:30.701616', 'step': 2315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:30.757414', 'step': 2315, 'epoch': 1} {'type': 'loss', 'content': 0.12253115326166153, 'timestamp': '2025-09-10 02:35:30.763914', 'step': 2316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:30.819465', 'step': 2316, 'epoch': 1} {'type': 'loss', 'content': 0.13218870759010315, 'timestamp': '2025-09-10 02:35:30.821788', 'step': 2317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:30.876452', 'step': 2317, 'epoch': 1} {'type': 'loss', 'content': 0.11354227364063263, 'timestamp': '2025-09-10 02:35:30.878804', 'step': 2318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:30.933484', 'step': 2318, 'epoch': 1} {'type': 'loss', 'content': 0.17842169106006622, 'timestamp': '2025-09-10 02:35:30.935858', 'step': 2319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:30.991143', 'step': 2319, 'epoch': 1} {'type': 'loss', 'content': 0.1470305174589157, 'timestamp': '2025-09-10 02:35:30.997712', 'step': 2320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:31.051431', 'step': 2320, 'epoch': 1} {'type': 'loss', 'content': 0.25795796513557434, 'timestamp': '2025-09-10 02:35:31.053876', 'step': 2321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:31.108972', 'step': 2321, 'epoch': 1} {'type': 'loss', 'content': 0.17834240198135376, 'timestamp': '2025-09-10 02:35:31.111300', 'step': 2322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:31.165809', 'step': 2322, 'epoch': 1} {'type': 'loss', 'content': 0.1633223295211792, 'timestamp': '2025-09-10 02:35:31.169015', 'step': 2323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:31.224571', 'step': 2323, 'epoch': 1} {'type': 'loss', 'content': 0.2160007357597351, 'timestamp': '2025-09-10 02:35:31.230884', 'step': 2324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:31.284388', 'step': 2324, 'epoch': 1} {'type': 'loss', 'content': 0.22283479571342468, 'timestamp': '2025-09-10 02:35:31.286823', 'step': 2325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:31.342901', 'step': 2325, 'epoch': 1} {'type': 'loss', 'content': 0.1121387928724289, 'timestamp': '2025-09-10 02:35:31.345140', 'step': 2326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:31.401688', 'step': 2326, 'epoch': 1} {'type': 'loss', 'content': 0.1703348606824875, 'timestamp': '2025-09-10 02:35:31.404101', 'step': 2327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:31.462324', 'step': 2327, 'epoch': 1} {'type': 'loss', 'content': 0.12474876642227173, 'timestamp': '2025-09-10 02:35:31.469445', 'step': 2328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:31.528070', 'step': 2328, 'epoch': 1} {'type': 'loss', 'content': 0.16331803798675537, 'timestamp': '2025-09-10 02:35:31.530551', 'step': 2329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:31.590077', 'step': 2329, 'epoch': 1} {'type': 'loss', 'content': 0.1512262225151062, 'timestamp': '2025-09-10 02:35:31.592548', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:35:44.275543', 'step': 2330, 'epoch': 1} {'type': 'pplx', 'content': 9412.837189560425, 'timestamp': '2025-09-10 02:35:44.278528', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:44.333620', 'step': 2330, 'epoch': 1} {'type': 'loss', 'content': 0.16759462654590607, 'timestamp': '2025-09-10 02:35:44.335284', 'step': 2331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:44.393192', 'step': 2331, 'epoch': 1} {'type': 'loss', 'content': 0.23364609479904175, 'timestamp': '2025-09-10 02:35:44.399700', 'step': 2332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:44.457537', 'step': 2332, 'epoch': 1} {'type': 'loss', 'content': 0.15976376831531525, 'timestamp': '2025-09-10 02:35:44.459230', 'step': 2333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:44.518846', 'step': 2333, 'epoch': 1} {'type': 'loss', 'content': 0.22113029658794403, 'timestamp': '2025-09-10 02:35:44.520609', 'step': 2334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:44.578449', 'step': 2334, 'epoch': 1} {'type': 'loss', 'content': 0.15416944026947021, 'timestamp': '2025-09-10 02:35:44.581034', 'step': 2335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:44.638260', 'step': 2335, 'epoch': 1} {'type': 'loss', 'content': 0.17233438789844513, 'timestamp': '2025-09-10 02:35:44.644518', 'step': 2336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:44.701474', 'step': 2336, 'epoch': 1} {'type': 'loss', 'content': 0.15927086770534515, 'timestamp': '2025-09-10 02:35:44.703395', 'step': 2337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:44.758267', 'step': 2337, 'epoch': 1} {'type': 'loss', 'content': 0.11842244118452072, 'timestamp': '2025-09-10 02:35:44.759917', 'step': 2338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:44.818243', 'step': 2338, 'epoch': 1} {'type': 'loss', 'content': 0.12713493406772614, 'timestamp': '2025-09-10 02:35:44.820137', 'step': 2339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:44.875883', 'step': 2339, 'epoch': 1} {'type': 'loss', 'content': 0.1617213934659958, 'timestamp': '2025-09-10 02:35:44.882400', 'step': 2340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:44.938107', 'step': 2340, 'epoch': 1} {'type': 'loss', 'content': 0.21054551005363464, 'timestamp': '2025-09-10 02:35:44.940761', 'step': 2341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:44.995592', 'step': 2341, 'epoch': 1} {'type': 'loss', 'content': 0.16167160868644714, 'timestamp': '2025-09-10 02:35:44.997242', 'step': 2342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:45.050123', 'step': 2342, 'epoch': 1} {'type': 'loss', 'content': 0.17144857347011566, 'timestamp': '2025-09-10 02:35:45.051896', 'step': 2343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:45.104853', 'step': 2343, 'epoch': 1} {'type': 'loss', 'content': 0.14505785703659058, 'timestamp': '2025-09-10 02:35:45.110615', 'step': 2344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:45.162854', 'step': 2344, 'epoch': 1} {'type': 'loss', 'content': 0.22944605350494385, 'timestamp': '2025-09-10 02:35:45.164617', 'step': 2345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:45.217562', 'step': 2345, 'epoch': 1} {'type': 'loss', 'content': 0.18326257169246674, 'timestamp': '2025-09-10 02:35:45.219720', 'step': 2346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:45.272487', 'step': 2346, 'epoch': 1} {'type': 'loss', 'content': 0.15909722447395325, 'timestamp': '2025-09-10 02:35:45.274502', 'step': 2347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:45.327265', 'step': 2347, 'epoch': 1} {'type': 'loss', 'content': 0.16049179434776306, 'timestamp': '2025-09-10 02:35:45.334326', 'step': 2348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:45.388112', 'step': 2348, 'epoch': 1} {'type': 'loss', 'content': 0.1801636964082718, 'timestamp': '2025-09-10 02:35:45.390193', 'step': 2349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:45.445100', 'step': 2349, 'epoch': 1} {'type': 'loss', 'content': 0.17568859457969666, 'timestamp': '2025-09-10 02:35:45.447216', 'step': 2350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:45.501999', 'step': 2350, 'epoch': 1} {'type': 'loss', 'content': 0.2730638384819031, 'timestamp': '2025-09-10 02:35:45.504163', 'step': 2351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:45.559264', 'step': 2351, 'epoch': 1} {'type': 'loss', 'content': 0.07166671007871628, 'timestamp': '2025-09-10 02:35:45.565673', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:45.619414', 'step': 2352, 'epoch': 1} {'type': 'loss', 'content': 0.15509045124053955, 'timestamp': '2025-09-10 02:35:45.621447', 'step': 2353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:45.676901', 'step': 2353, 'epoch': 1} {'type': 'loss', 'content': 0.10040023177862167, 'timestamp': '2025-09-10 02:35:45.678929', 'step': 2354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:45.735602', 'step': 2354, 'epoch': 1} {'type': 'loss', 'content': 0.2570422291755676, 'timestamp': '2025-09-10 02:35:45.737854', 'step': 2355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:45.794779', 'step': 2355, 'epoch': 1} {'type': 'loss', 'content': 0.17036022245883942, 'timestamp': '2025-09-10 02:35:45.801474', 'step': 2356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:45.857822', 'step': 2356, 'epoch': 1} {'type': 'loss', 'content': 0.1774628907442093, 'timestamp': '2025-09-10 02:35:45.860094', 'step': 2357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:45.917247', 'step': 2357, 'epoch': 1} {'type': 'loss', 'content': 0.12831972539424896, 'timestamp': '2025-09-10 02:35:45.919666', 'step': 2358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:45.981466', 'step': 2358, 'epoch': 1} {'type': 'loss', 'content': 0.18196482956409454, 'timestamp': '2025-09-10 02:35:45.983542', 'step': 2359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:46.042596', 'step': 2359, 'epoch': 1} {'type': 'loss', 'content': 0.21271872520446777, 'timestamp': '2025-09-10 02:35:46.049121', 'step': 2360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:46.103661', 'step': 2360, 'epoch': 1} {'type': 'loss', 'content': 0.2520894706249237, 'timestamp': '2025-09-10 02:35:46.105799', 'step': 2361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:46.160991', 'step': 2361, 'epoch': 1} {'type': 'loss', 'content': 0.2069084644317627, 'timestamp': '2025-09-10 02:35:46.162801', 'step': 2362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:46.217254', 'step': 2362, 'epoch': 1} {'type': 'loss', 'content': 0.1644185334444046, 'timestamp': '2025-09-10 02:35:46.219077', 'step': 2363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:46.273431', 'step': 2363, 'epoch': 1} {'type': 'loss', 'content': 0.3405568599700928, 'timestamp': '2025-09-10 02:35:46.279577', 'step': 2364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:46.334145', 'step': 2364, 'epoch': 1} {'type': 'loss', 'content': 0.2462693154811859, 'timestamp': '2025-09-10 02:35:46.336057', 'step': 2365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:46.391973', 'step': 2365, 'epoch': 1} {'type': 'loss', 'content': 0.26906633377075195, 'timestamp': '2025-09-10 02:35:46.394047', 'step': 2366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:46.449203', 'step': 2366, 'epoch': 1} {'type': 'loss', 'content': 0.15093804895877838, 'timestamp': '2025-09-10 02:35:46.450993', 'step': 2367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:46.506481', 'step': 2367, 'epoch': 1} {'type': 'loss', 'content': 0.19669942557811737, 'timestamp': '2025-09-10 02:35:46.512876', 'step': 2368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:46.567572', 'step': 2368, 'epoch': 1} {'type': 'loss', 'content': 0.2524394094944, 'timestamp': '2025-09-10 02:35:46.569410', 'step': 2369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:46.623566', 'step': 2369, 'epoch': 1} {'type': 'loss', 'content': 0.13687251508235931, 'timestamp': '2025-09-10 02:35:46.625609', 'step': 2370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:46.679757', 'step': 2370, 'epoch': 1} {'type': 'loss', 'content': 0.12969724833965302, 'timestamp': '2025-09-10 02:35:46.681609', 'step': 2371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:46.739176', 'step': 2371, 'epoch': 1} {'type': 'loss', 'content': 0.12379957735538483, 'timestamp': '2025-09-10 02:35:46.745528', 'step': 2372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:46.801144', 'step': 2372, 'epoch': 1} {'type': 'loss', 'content': 0.193171426653862, 'timestamp': '2025-09-10 02:35:46.803190', 'step': 2373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:46.857818', 'step': 2373, 'epoch': 1} {'type': 'loss', 'content': 0.23914165794849396, 'timestamp': '2025-09-10 02:35:46.859732', 'step': 2374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:46.913564', 'step': 2374, 'epoch': 1} {'type': 'loss', 'content': 0.2543128728866577, 'timestamp': '2025-09-10 02:35:46.915362', 'step': 2375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:46.968274', 'step': 2375, 'epoch': 1} {'type': 'loss', 'content': 0.1958596557378769, 'timestamp': '2025-09-10 02:35:46.974372', 'step': 2376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:47.026804', 'step': 2376, 'epoch': 1} {'type': 'loss', 'content': 0.3113550841808319, 'timestamp': '2025-09-10 02:35:47.028605', 'step': 2377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:47.081640', 'step': 2377, 'epoch': 1} {'type': 'loss', 'content': 0.2607210874557495, 'timestamp': '2025-09-10 02:35:47.083421', 'step': 2378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:47.136792', 'step': 2378, 'epoch': 1} {'type': 'loss', 'content': 0.1558355987071991, 'timestamp': '2025-09-10 02:35:47.138597', 'step': 2379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:47.192839', 'step': 2379, 'epoch': 1} {'type': 'loss', 'content': 0.1488657295703888, 'timestamp': '2025-09-10 02:35:47.198769', 'step': 2380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:47.251397', 'step': 2380, 'epoch': 1} {'type': 'loss', 'content': 0.12796638906002045, 'timestamp': '2025-09-10 02:35:47.253159', 'step': 2381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:47.305931', 'step': 2381, 'epoch': 1} {'type': 'loss', 'content': 0.24016404151916504, 'timestamp': '2025-09-10 02:35:47.307963', 'step': 2382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:47.362840', 'step': 2382, 'epoch': 1} {'type': 'loss', 'content': 0.19751571118831635, 'timestamp': '2025-09-10 02:35:47.364655', 'step': 2383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:47.417260', 'step': 2383, 'epoch': 1} {'type': 'loss', 'content': 0.2922564148902893, 'timestamp': '2025-09-10 02:35:47.422944', 'step': 2384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:47.476026', 'step': 2384, 'epoch': 1} {'type': 'loss', 'content': 0.19996409118175507, 'timestamp': '2025-09-10 02:35:47.478896', 'step': 2385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:47.534359', 'step': 2385, 'epoch': 1} {'type': 'loss', 'content': 0.22128048539161682, 'timestamp': '2025-09-10 02:35:47.536307', 'step': 2386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:47.593235', 'step': 2386, 'epoch': 1} {'type': 'loss', 'content': 0.16750872135162354, 'timestamp': '2025-09-10 02:35:47.595213', 'step': 2387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:47.650631', 'step': 2387, 'epoch': 1} {'type': 'loss', 'content': 0.1830751895904541, 'timestamp': '2025-09-10 02:35:47.656495', 'step': 2388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:47.711006', 'step': 2388, 'epoch': 1} {'type': 'loss', 'content': 0.18703830242156982, 'timestamp': '2025-09-10 02:35:47.712817', 'step': 2389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:47.769917', 'step': 2389, 'epoch': 1} {'type': 'loss', 'content': 0.2042335867881775, 'timestamp': '2025-09-10 02:35:47.771709', 'step': 2390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:47.827076', 'step': 2390, 'epoch': 1} {'type': 'loss', 'content': 0.2498476803302765, 'timestamp': '2025-09-10 02:35:47.829104', 'step': 2391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:47.883604', 'step': 2391, 'epoch': 1} {'type': 'loss', 'content': 0.21647605299949646, 'timestamp': '2025-09-10 02:35:47.889651', 'step': 2392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:47.944189', 'step': 2392, 'epoch': 1} {'type': 'loss', 'content': 0.24956142902374268, 'timestamp': '2025-09-10 02:35:47.945932', 'step': 2393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:47.999807', 'step': 2393, 'epoch': 1} {'type': 'loss', 'content': 0.12119919061660767, 'timestamp': '2025-09-10 02:35:48.001620', 'step': 2394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:48.056328', 'step': 2394, 'epoch': 1} {'type': 'loss', 'content': 0.20386408269405365, 'timestamp': '2025-09-10 02:35:48.058131', 'step': 2395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:48.113287', 'step': 2395, 'epoch': 1} {'type': 'loss', 'content': 0.21086269617080688, 'timestamp': '2025-09-10 02:35:48.119564', 'step': 2396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:48.175550', 'step': 2396, 'epoch': 1} {'type': 'loss', 'content': 0.10307696461677551, 'timestamp': '2025-09-10 02:35:48.177446', 'step': 2397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:48.232651', 'step': 2397, 'epoch': 1} {'type': 'loss', 'content': 0.13363444805145264, 'timestamp': '2025-09-10 02:35:48.234480', 'step': 2398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:48.290132', 'step': 2398, 'epoch': 1} {'type': 'loss', 'content': 0.18393218517303467, 'timestamp': '2025-09-10 02:35:48.292363', 'step': 2399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:48.347141', 'step': 2399, 'epoch': 1} {'type': 'loss', 'content': 0.14939674735069275, 'timestamp': '2025-09-10 02:35:48.354807', 'step': 2400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:48.410166', 'step': 2400, 'epoch': 1} {'type': 'loss', 'content': 0.18219837546348572, 'timestamp': '2025-09-10 02:35:48.411983', 'step': 2401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:48.467615', 'step': 2401, 'epoch': 1} {'type': 'loss', 'content': 0.13809724152088165, 'timestamp': '2025-09-10 02:35:48.469616', 'step': 2402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:48.524335', 'step': 2402, 'epoch': 1} {'type': 'loss', 'content': 0.17950457334518433, 'timestamp': '2025-09-10 02:35:48.526426', 'step': 2403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:48.581743', 'step': 2403, 'epoch': 1} {'type': 'loss', 'content': 0.13900242745876312, 'timestamp': '2025-09-10 02:35:48.588300', 'step': 2404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:48.646219', 'step': 2404, 'epoch': 1} {'type': 'loss', 'content': 0.15101258456707, 'timestamp': '2025-09-10 02:35:48.648470', 'step': 2405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:48.707260', 'step': 2405, 'epoch': 1} {'type': 'loss', 'content': 0.1861579269170761, 'timestamp': '2025-09-10 02:35:48.709460', 'step': 2406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:48.769577', 'step': 2406, 'epoch': 1} {'type': 'loss', 'content': 0.18836644291877747, 'timestamp': '2025-09-10 02:35:48.771690', 'step': 2407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:48.828692', 'step': 2407, 'epoch': 1} {'type': 'loss', 'content': 0.12765814363956451, 'timestamp': '2025-09-10 02:35:48.835438', 'step': 2408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:48.891636', 'step': 2408, 'epoch': 1} {'type': 'loss', 'content': 0.21356812119483948, 'timestamp': '2025-09-10 02:35:48.893885', 'step': 2409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:48.948657', 'step': 2409, 'epoch': 1} {'type': 'loss', 'content': 0.21056176722049713, 'timestamp': '2025-09-10 02:35:48.950741', 'step': 2410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:49.006086', 'step': 2410, 'epoch': 1} {'type': 'loss', 'content': 0.30073848366737366, 'timestamp': '2025-09-10 02:35:49.008199', 'step': 2411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:49.063072', 'step': 2411, 'epoch': 1} {'type': 'loss', 'content': 0.23546290397644043, 'timestamp': '2025-09-10 02:35:49.069377', 'step': 2412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:49.125715', 'step': 2412, 'epoch': 1} {'type': 'loss', 'content': 0.24366667866706848, 'timestamp': '2025-09-10 02:35:49.127810', 'step': 2413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:49.183933', 'step': 2413, 'epoch': 1} {'type': 'loss', 'content': 0.20601612329483032, 'timestamp': '2025-09-10 02:35:49.186106', 'step': 2414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:49.243098', 'step': 2414, 'epoch': 1} {'type': 'loss', 'content': 0.24489951133728027, 'timestamp': '2025-09-10 02:35:49.245367', 'step': 2415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:49.301737', 'step': 2415, 'epoch': 1} {'type': 'loss', 'content': 0.2313527911901474, 'timestamp': '2025-09-10 02:35:49.308275', 'step': 2416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:49.362865', 'step': 2416, 'epoch': 1} {'type': 'loss', 'content': 0.15904343128204346, 'timestamp': '2025-09-10 02:35:49.365023', 'step': 2417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:49.420563', 'step': 2417, 'epoch': 1} {'type': 'loss', 'content': 0.27269473671913147, 'timestamp': '2025-09-10 02:35:49.422678', 'step': 2418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:49.479723', 'step': 2418, 'epoch': 1} {'type': 'loss', 'content': 0.18607144057750702, 'timestamp': '2025-09-10 02:35:49.481803', 'step': 2419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:49.537674', 'step': 2419, 'epoch': 1} {'type': 'loss', 'content': 0.18635515868663788, 'timestamp': '2025-09-10 02:35:49.544151', 'step': 2420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:49.598694', 'step': 2420, 'epoch': 1} {'type': 'loss', 'content': 0.17518600821495056, 'timestamp': '2025-09-10 02:35:49.600799', 'step': 2421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:49.655184', 'step': 2421, 'epoch': 1} {'type': 'loss', 'content': 0.24463298916816711, 'timestamp': '2025-09-10 02:35:49.657254', 'step': 2422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:49.713552', 'step': 2422, 'epoch': 1} {'type': 'loss', 'content': 0.19402410089969635, 'timestamp': '2025-09-10 02:35:49.715827', 'step': 2423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:49.773631', 'step': 2423, 'epoch': 1} {'type': 'loss', 'content': 0.13312667608261108, 'timestamp': '2025-09-10 02:35:49.780336', 'step': 2424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:49.836531', 'step': 2424, 'epoch': 1} {'type': 'loss', 'content': 0.1914098709821701, 'timestamp': '2025-09-10 02:35:49.838594', 'step': 2425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:49.894106', 'step': 2425, 'epoch': 1} {'type': 'loss', 'content': 0.18808797001838684, 'timestamp': '2025-09-10 02:35:49.896304', 'step': 2426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:49.952176', 'step': 2426, 'epoch': 1} {'type': 'loss', 'content': 0.16017870604991913, 'timestamp': '2025-09-10 02:35:49.954263', 'step': 2427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:50.009180', 'step': 2427, 'epoch': 1} {'type': 'loss', 'content': 0.26100921630859375, 'timestamp': '2025-09-10 02:35:50.015536', 'step': 2428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.070204', 'step': 2428, 'epoch': 1} {'type': 'loss', 'content': 0.11401253193616867, 'timestamp': '2025-09-10 02:35:50.072303', 'step': 2429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:50.128837', 'step': 2429, 'epoch': 1} {'type': 'loss', 'content': 0.12652043998241425, 'timestamp': '2025-09-10 02:35:50.131139', 'step': 2430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.187786', 'step': 2430, 'epoch': 1} {'type': 'loss', 'content': 0.2200973927974701, 'timestamp': '2025-09-10 02:35:50.189930', 'step': 2431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.245613', 'step': 2431, 'epoch': 1} {'type': 'loss', 'content': 0.18503805994987488, 'timestamp': '2025-09-10 02:35:50.252315', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.308961', 'step': 2432, 'epoch': 1} {'type': 'loss', 'content': 0.18909859657287598, 'timestamp': '2025-09-10 02:35:50.311034', 'step': 2433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:50.367106', 'step': 2433, 'epoch': 1} {'type': 'loss', 'content': 0.12196607142686844, 'timestamp': '2025-09-10 02:35:50.369360', 'step': 2434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.425568', 'step': 2434, 'epoch': 1} {'type': 'loss', 'content': 0.24300043284893036, 'timestamp': '2025-09-10 02:35:50.428061', 'step': 2435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.484004', 'step': 2435, 'epoch': 1} {'type': 'loss', 'content': 0.11143365502357483, 'timestamp': '2025-09-10 02:35:50.490543', 'step': 2436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.544645', 'step': 2436, 'epoch': 1} {'type': 'loss', 'content': 0.14814360439777374, 'timestamp': '2025-09-10 02:35:50.546737', 'step': 2437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.601770', 'step': 2437, 'epoch': 1} {'type': 'loss', 'content': 0.14600048959255219, 'timestamp': '2025-09-10 02:35:50.603895', 'step': 2438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:50.669594', 'step': 2438, 'epoch': 1} {'type': 'loss', 'content': 0.13949128985404968, 'timestamp': '2025-09-10 02:35:50.671701', 'step': 2439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:50.727872', 'step': 2439, 'epoch': 1} {'type': 'loss', 'content': 0.18155521154403687, 'timestamp': '2025-09-10 02:35:50.734454', 'step': 2440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.790318', 'step': 2440, 'epoch': 1} {'type': 'loss', 'content': 0.30871686339378357, 'timestamp': '2025-09-10 02:35:50.792412', 'step': 2441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:50.848468', 'step': 2441, 'epoch': 1} {'type': 'loss', 'content': 0.21155261993408203, 'timestamp': '2025-09-10 02:35:50.850541', 'step': 2442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:50.906999', 'step': 2442, 'epoch': 1} {'type': 'loss', 'content': 0.18929731845855713, 'timestamp': '2025-09-10 02:35:50.909090', 'step': 2443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:50.964816', 'step': 2443, 'epoch': 1} {'type': 'loss', 'content': 0.26658201217651367, 'timestamp': '2025-09-10 02:35:50.972751', 'step': 2444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:51.029331', 'step': 2444, 'epoch': 1} {'type': 'loss', 'content': 0.0800623670220375, 'timestamp': '2025-09-10 02:35:51.031507', 'step': 2445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:51.086479', 'step': 2445, 'epoch': 1} {'type': 'loss', 'content': 0.21587759256362915, 'timestamp': '2025-09-10 02:35:51.089192', 'step': 2446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:51.145853', 'step': 2446, 'epoch': 1} {'type': 'loss', 'content': 0.14295393228530884, 'timestamp': '2025-09-10 02:35:51.147874', 'step': 2447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:51.201910', 'step': 2447, 'epoch': 1} {'type': 'loss', 'content': 0.18232600390911102, 'timestamp': '2025-09-10 02:35:51.207998', 'step': 2448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:51.261577', 'step': 2448, 'epoch': 1} {'type': 'loss', 'content': 0.22178813815116882, 'timestamp': '2025-09-10 02:35:51.263622', 'step': 2449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:51.316579', 'step': 2449, 'epoch': 1} {'type': 'loss', 'content': 0.18936221301555634, 'timestamp': '2025-09-10 02:35:51.318389', 'step': 2450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:51.371832', 'step': 2450, 'epoch': 1} {'type': 'loss', 'content': 0.10139092803001404, 'timestamp': '2025-09-10 02:35:51.373990', 'step': 2451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:51.428570', 'step': 2451, 'epoch': 1} {'type': 'loss', 'content': 0.24268245697021484, 'timestamp': '2025-09-10 02:35:51.434670', 'step': 2452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:51.486900', 'step': 2452, 'epoch': 1} {'type': 'loss', 'content': 0.1499968320131302, 'timestamp': '2025-09-10 02:35:51.489075', 'step': 2453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:51.542189', 'step': 2453, 'epoch': 1} {'type': 'loss', 'content': 0.13867564499378204, 'timestamp': '2025-09-10 02:35:51.544352', 'step': 2454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:51.597564', 'step': 2454, 'epoch': 1} {'type': 'loss', 'content': 0.16257400810718536, 'timestamp': '2025-09-10 02:35:51.599572', 'step': 2455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:51.652980', 'step': 2455, 'epoch': 1} {'type': 'loss', 'content': 0.1774921864271164, 'timestamp': '2025-09-10 02:35:51.658992', 'step': 2456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:51.712775', 'step': 2456, 'epoch': 1} {'type': 'loss', 'content': 0.17387792468070984, 'timestamp': '2025-09-10 02:35:51.714605', 'step': 2457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:51.767823', 'step': 2457, 'epoch': 1} {'type': 'loss', 'content': 0.15530675649642944, 'timestamp': '2025-09-10 02:35:51.769991', 'step': 2458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:51.825070', 'step': 2458, 'epoch': 1} {'type': 'loss', 'content': 0.22519707679748535, 'timestamp': '2025-09-10 02:35:51.827006', 'step': 2459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:51.883314', 'step': 2459, 'epoch': 1} {'type': 'loss', 'content': 0.1597122997045517, 'timestamp': '2025-09-10 02:35:51.889782', 'step': 2460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:51.942861', 'step': 2460, 'epoch': 1} {'type': 'loss', 'content': 0.2050696760416031, 'timestamp': '2025-09-10 02:35:51.945062', 'step': 2461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:51.998667', 'step': 2461, 'epoch': 1} {'type': 'loss', 'content': 0.166951984167099, 'timestamp': '2025-09-10 02:35:52.000687', 'step': 2462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:52.054063', 'step': 2462, 'epoch': 1} {'type': 'loss', 'content': 0.24230189621448517, 'timestamp': '2025-09-10 02:35:52.055962', 'step': 2463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:52.109483', 'step': 2463, 'epoch': 1} {'type': 'loss', 'content': 0.21731717884540558, 'timestamp': '2025-09-10 02:35:52.115262', 'step': 2464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:52.167608', 'step': 2464, 'epoch': 1} {'type': 'loss', 'content': 0.22728776931762695, 'timestamp': '2025-09-10 02:35:52.169368', 'step': 2465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:52.221889', 'step': 2465, 'epoch': 1} {'type': 'loss', 'content': 0.2018868625164032, 'timestamp': '2025-09-10 02:35:52.223740', 'step': 2466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:52.277243', 'step': 2466, 'epoch': 1} {'type': 'loss', 'content': 0.13736313581466675, 'timestamp': '2025-09-10 02:35:52.279461', 'step': 2467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:52.333278', 'step': 2467, 'epoch': 1} {'type': 'loss', 'content': 0.20892709493637085, 'timestamp': '2025-09-10 02:35:52.339000', 'step': 2468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:52.395489', 'step': 2468, 'epoch': 1} {'type': 'loss', 'content': 0.10999797284603119, 'timestamp': '2025-09-10 02:35:52.397409', 'step': 2469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:52.465238', 'step': 2469, 'epoch': 1} {'type': 'loss', 'content': 0.2966879904270172, 'timestamp': '2025-09-10 02:35:52.467326', 'step': 2470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:52.522048', 'step': 2470, 'epoch': 1} {'type': 'loss', 'content': 0.14877133071422577, 'timestamp': '2025-09-10 02:35:52.524046', 'step': 2471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:52.578226', 'step': 2471, 'epoch': 1} {'type': 'loss', 'content': 0.15346026420593262, 'timestamp': '2025-09-10 02:35:52.584400', 'step': 2472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:52.638200', 'step': 2472, 'epoch': 1} {'type': 'loss', 'content': 0.17957796156406403, 'timestamp': '2025-09-10 02:35:52.640160', 'step': 2473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:52.696267', 'step': 2473, 'epoch': 1} {'type': 'loss', 'content': 0.21462741494178772, 'timestamp': '2025-09-10 02:35:52.698224', 'step': 2474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:52.752787', 'step': 2474, 'epoch': 1} {'type': 'loss', 'content': 0.10628966987133026, 'timestamp': '2025-09-10 02:35:52.754516', 'step': 2475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:52.808452', 'step': 2475, 'epoch': 1} {'type': 'loss', 'content': 0.20093734562397003, 'timestamp': '2025-09-10 02:35:52.814522', 'step': 2476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:52.868412', 'step': 2476, 'epoch': 1} {'type': 'loss', 'content': 0.2648061215877533, 'timestamp': '2025-09-10 02:35:52.870930', 'step': 2477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:52.924475', 'step': 2477, 'epoch': 1} {'type': 'loss', 'content': 0.09913995116949081, 'timestamp': '2025-09-10 02:35:52.926563', 'step': 2478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:52.984435', 'step': 2478, 'epoch': 1} {'type': 'loss', 'content': 0.15573237836360931, 'timestamp': '2025-09-10 02:35:52.986446', 'step': 2479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:53.042719', 'step': 2479, 'epoch': 1} {'type': 'loss', 'content': 0.1202489361166954, 'timestamp': '2025-09-10 02:35:53.049023', 'step': 2480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:53.105119', 'step': 2480, 'epoch': 1} {'type': 'loss', 'content': 0.2634223401546478, 'timestamp': '2025-09-10 02:35:53.107152', 'step': 2481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:53.161904', 'step': 2481, 'epoch': 1} {'type': 'loss', 'content': 0.1478249579668045, 'timestamp': '2025-09-10 02:35:53.163909', 'step': 2482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:53.218564', 'step': 2482, 'epoch': 1} {'type': 'loss', 'content': 0.22759126126766205, 'timestamp': '2025-09-10 02:35:53.220584', 'step': 2483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:53.275370', 'step': 2483, 'epoch': 1} {'type': 'loss', 'content': 0.18285202980041504, 'timestamp': '2025-09-10 02:35:53.281451', 'step': 2484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:53.337077', 'step': 2484, 'epoch': 1} {'type': 'loss', 'content': 0.21212561428546906, 'timestamp': '2025-09-10 02:35:53.339112', 'step': 2485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:53.395301', 'step': 2485, 'epoch': 1} {'type': 'loss', 'content': 0.2534582018852234, 'timestamp': '2025-09-10 02:35:53.397382', 'step': 2486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:53.452142', 'step': 2486, 'epoch': 1} {'type': 'loss', 'content': 0.11814558506011963, 'timestamp': '2025-09-10 02:35:53.454978', 'step': 2487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:53.510286', 'step': 2487, 'epoch': 1} {'type': 'loss', 'content': 0.2724410891532898, 'timestamp': '2025-09-10 02:35:53.516412', 'step': 2488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:53.570800', 'step': 2488, 'epoch': 1} {'type': 'loss', 'content': 0.19351501762866974, 'timestamp': '2025-09-10 02:35:53.572971', 'step': 2489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:53.627099', 'step': 2489, 'epoch': 1} {'type': 'loss', 'content': 0.16717194020748138, 'timestamp': '2025-09-10 02:35:53.629009', 'step': 2490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:53.682229', 'step': 2490, 'epoch': 1} {'type': 'loss', 'content': 0.20844659209251404, 'timestamp': '2025-09-10 02:35:53.684349', 'step': 2491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:53.737811', 'step': 2491, 'epoch': 1} {'type': 'loss', 'content': 0.15839165449142456, 'timestamp': '2025-09-10 02:35:53.743845', 'step': 2492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:53.797158', 'step': 2492, 'epoch': 1} {'type': 'loss', 'content': 0.13439148664474487, 'timestamp': '2025-09-10 02:35:53.799377', 'step': 2493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:53.853287', 'step': 2493, 'epoch': 1} {'type': 'loss', 'content': 0.3087925910949707, 'timestamp': '2025-09-10 02:35:53.855587', 'step': 2494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:53.911057', 'step': 2494, 'epoch': 1} {'type': 'loss', 'content': 0.25681450963020325, 'timestamp': '2025-09-10 02:35:53.913387', 'step': 2495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:53.967685', 'step': 2495, 'epoch': 1} {'type': 'loss', 'content': 0.14349433779716492, 'timestamp': '2025-09-10 02:35:53.973837', 'step': 2496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:54.027087', 'step': 2496, 'epoch': 1} {'type': 'loss', 'content': 0.1457730233669281, 'timestamp': '2025-09-10 02:35:54.029440', 'step': 2497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:54.084348', 'step': 2497, 'epoch': 1} {'type': 'loss', 'content': 0.18961943686008453, 'timestamp': '2025-09-10 02:35:54.086706', 'step': 2498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:54.142458', 'step': 2498, 'epoch': 1} {'type': 'loss', 'content': 0.1235419288277626, 'timestamp': '2025-09-10 02:35:54.144526', 'step': 2499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:54.199673', 'step': 2499, 'epoch': 1} {'type': 'loss', 'content': 0.22351376712322235, 'timestamp': '2025-09-10 02:35:54.205994', 'step': 2500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-09-10 02:35:54.629941', 'step': 2500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:54.688009', 'step': 2500, 'epoch': 1} {'type': 'loss', 'content': 0.18395332992076874, 'timestamp': '2025-09-10 02:35:54.690606', 'step': 2501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:54.749940', 'step': 2501, 'epoch': 1} {'type': 'loss', 'content': 0.20682980120182037, 'timestamp': '2025-09-10 02:35:54.752306', 'step': 2502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:54.808411', 'step': 2502, 'epoch': 1} {'type': 'loss', 'content': 0.11714495718479156, 'timestamp': '2025-09-10 02:35:54.810503', 'step': 2503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:54.865282', 'step': 2503, 'epoch': 1} {'type': 'loss', 'content': 0.18913204967975616, 'timestamp': '2025-09-10 02:35:54.871757', 'step': 2504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:54.926863', 'step': 2504, 'epoch': 1} {'type': 'loss', 'content': 0.21822501718997955, 'timestamp': '2025-09-10 02:35:54.928987', 'step': 2505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:54.991081', 'step': 2505, 'epoch': 1} {'type': 'loss', 'content': 0.15685097873210907, 'timestamp': '2025-09-10 02:35:54.993436', 'step': 2506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:55.049866', 'step': 2506, 'epoch': 1} {'type': 'loss', 'content': 0.16300615668296814, 'timestamp': '2025-09-10 02:35:55.052183', 'step': 2507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:55.107558', 'step': 2507, 'epoch': 1} {'type': 'loss', 'content': 0.2672852873802185, 'timestamp': '2025-09-10 02:35:55.114108', 'step': 2508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:55.167853', 'step': 2508, 'epoch': 1} {'type': 'loss', 'content': 0.12680165469646454, 'timestamp': '2025-09-10 02:35:55.170116', 'step': 2509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:55.224462', 'step': 2509, 'epoch': 1} {'type': 'loss', 'content': 0.15552550554275513, 'timestamp': '2025-09-10 02:35:55.226680', 'step': 2510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:55.281290', 'step': 2510, 'epoch': 1} {'type': 'loss', 'content': 0.18305666744709015, 'timestamp': '2025-09-10 02:35:55.283301', 'step': 2511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:55.337849', 'step': 2511, 'epoch': 1} {'type': 'loss', 'content': 0.17315226793289185, 'timestamp': '2025-09-10 02:35:55.344263', 'step': 2512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:55.398023', 'step': 2512, 'epoch': 1} {'type': 'loss', 'content': 0.31208834052085876, 'timestamp': '2025-09-10 02:35:55.400469', 'step': 2513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:55.456119', 'step': 2513, 'epoch': 1} {'type': 'loss', 'content': 0.1925247460603714, 'timestamp': '2025-09-10 02:35:55.458316', 'step': 2514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:55.519254', 'step': 2514, 'epoch': 1} {'type': 'loss', 'content': 0.18509045243263245, 'timestamp': '2025-09-10 02:35:55.521579', 'step': 2515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:55.576326', 'step': 2515, 'epoch': 1} {'type': 'loss', 'content': 0.20595332980155945, 'timestamp': '2025-09-10 02:35:55.582543', 'step': 2516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:55.638186', 'step': 2516, 'epoch': 1} {'type': 'loss', 'content': 0.2123880237340927, 'timestamp': '2025-09-10 02:35:55.640528', 'step': 2517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:55.695090', 'step': 2517, 'epoch': 1} {'type': 'loss', 'content': 0.10929377377033234, 'timestamp': '2025-09-10 02:35:55.697491', 'step': 2518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:55.759789', 'step': 2518, 'epoch': 1} {'type': 'loss', 'content': 0.1595454216003418, 'timestamp': '2025-09-10 02:35:55.762058', 'step': 2519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:55.816565', 'step': 2519, 'epoch': 1} {'type': 'loss', 'content': 0.1538662165403366, 'timestamp': '2025-09-10 02:35:55.823210', 'step': 2520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:55.877383', 'step': 2520, 'epoch': 1} {'type': 'loss', 'content': 0.2965356707572937, 'timestamp': '2025-09-10 02:35:55.879428', 'step': 2521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:55.934264', 'step': 2521, 'epoch': 1} {'type': 'loss', 'content': 0.09865943342447281, 'timestamp': '2025-09-10 02:35:55.936549', 'step': 2522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:55.991418', 'step': 2522, 'epoch': 1} {'type': 'loss', 'content': 0.1277279555797577, 'timestamp': '2025-09-10 02:35:55.993731', 'step': 2523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:56.048982', 'step': 2523, 'epoch': 1} {'type': 'loss', 'content': 0.16448554396629333, 'timestamp': '2025-09-10 02:35:56.055497', 'step': 2524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:56.109726', 'step': 2524, 'epoch': 1} {'type': 'loss', 'content': 0.2556600272655487, 'timestamp': '2025-09-10 02:35:56.111869', 'step': 2525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:56.167703', 'step': 2525, 'epoch': 1} {'type': 'loss', 'content': 0.12966236472129822, 'timestamp': '2025-09-10 02:35:56.170005', 'step': 2526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:56.226572', 'step': 2526, 'epoch': 1} {'type': 'loss', 'content': 0.24931557476520538, 'timestamp': '2025-09-10 02:35:56.229068', 'step': 2527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:56.285728', 'step': 2527, 'epoch': 1} {'type': 'loss', 'content': 0.2538340985774994, 'timestamp': '2025-09-10 02:35:56.292569', 'step': 2528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:56.349332', 'step': 2528, 'epoch': 1} {'type': 'loss', 'content': 0.1932843029499054, 'timestamp': '2025-09-10 02:35:56.351624', 'step': 2529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:56.408478', 'step': 2529, 'epoch': 1} {'type': 'loss', 'content': 0.22006727755069733, 'timestamp': '2025-09-10 02:35:56.410786', 'step': 2530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:56.470728', 'step': 2530, 'epoch': 1} {'type': 'loss', 'content': 0.20997317135334015, 'timestamp': '2025-09-10 02:35:56.473360', 'step': 2531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:56.531225', 'step': 2531, 'epoch': 1} {'type': 'loss', 'content': 0.18141144514083862, 'timestamp': '2025-09-10 02:35:56.538375', 'step': 2532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:56.594015', 'step': 2532, 'epoch': 1} {'type': 'loss', 'content': 0.1699964702129364, 'timestamp': '2025-09-10 02:35:56.596221', 'step': 2533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:56.651916', 'step': 2533, 'epoch': 1} {'type': 'loss', 'content': 0.1505713313817978, 'timestamp': '2025-09-10 02:35:56.654107', 'step': 2534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:56.711510', 'step': 2534, 'epoch': 1} {'type': 'loss', 'content': 0.17722871899604797, 'timestamp': '2025-09-10 02:35:56.713670', 'step': 2535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:56.770484', 'step': 2535, 'epoch': 1} {'type': 'loss', 'content': 0.2191605567932129, 'timestamp': '2025-09-10 02:35:56.777479', 'step': 2536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:56.834024', 'step': 2536, 'epoch': 1} {'type': 'loss', 'content': 0.09295106679201126, 'timestamp': '2025-09-10 02:35:56.836373', 'step': 2537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:56.892914', 'step': 2537, 'epoch': 1} {'type': 'loss', 'content': 0.11603190004825592, 'timestamp': '2025-09-10 02:35:56.895027', 'step': 2538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:56.951733', 'step': 2538, 'epoch': 1} {'type': 'loss', 'content': 0.21530266106128693, 'timestamp': '2025-09-10 02:35:56.954089', 'step': 2539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:57.010483', 'step': 2539, 'epoch': 1} {'type': 'loss', 'content': 0.18876712024211884, 'timestamp': '2025-09-10 02:35:57.017423', 'step': 2540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:57.073650', 'step': 2540, 'epoch': 1} {'type': 'loss', 'content': 0.21366167068481445, 'timestamp': '2025-09-10 02:35:57.075835', 'step': 2541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:57.133254', 'step': 2541, 'epoch': 1} {'type': 'loss', 'content': 0.1158689334988594, 'timestamp': '2025-09-10 02:35:57.135546', 'step': 2542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:57.193122', 'step': 2542, 'epoch': 1} {'type': 'loss', 'content': 0.11097298562526703, 'timestamp': '2025-09-10 02:35:57.194956', 'step': 2543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:57.252826', 'step': 2543, 'epoch': 1} {'type': 'loss', 'content': 0.21661622822284698, 'timestamp': '2025-09-10 02:35:57.259658', 'step': 2544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:57.315296', 'step': 2544, 'epoch': 1} {'type': 'loss', 'content': 0.1622428447008133, 'timestamp': '2025-09-10 02:35:57.317737', 'step': 2545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:57.374223', 'step': 2545, 'epoch': 1} {'type': 'loss', 'content': 0.1696399599313736, 'timestamp': '2025-09-10 02:35:57.376557', 'step': 2546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:57.432536', 'step': 2546, 'epoch': 1} {'type': 'loss', 'content': 0.1986331045627594, 'timestamp': '2025-09-10 02:35:57.434718', 'step': 2547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:57.490825', 'step': 2547, 'epoch': 1} {'type': 'loss', 'content': 0.25263768434524536, 'timestamp': '2025-09-10 02:35:57.497589', 'step': 2548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:57.553810', 'step': 2548, 'epoch': 1} {'type': 'loss', 'content': 0.2369857132434845, 'timestamp': '2025-09-10 02:35:57.556172', 'step': 2549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:57.613104', 'step': 2549, 'epoch': 1} {'type': 'loss', 'content': 0.1297585517168045, 'timestamp': '2025-09-10 02:35:57.615505', 'step': 2550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:57.671457', 'step': 2550, 'epoch': 1} {'type': 'loss', 'content': 0.22508348524570465, 'timestamp': '2025-09-10 02:35:57.673750', 'step': 2551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:57.729612', 'step': 2551, 'epoch': 1} {'type': 'loss', 'content': 0.11092448234558105, 'timestamp': '2025-09-10 02:35:57.736541', 'step': 2552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:57.793553', 'step': 2552, 'epoch': 1} {'type': 'loss', 'content': 0.24054595828056335, 'timestamp': '2025-09-10 02:35:57.795826', 'step': 2553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:57.852699', 'step': 2553, 'epoch': 1} {'type': 'loss', 'content': 0.17637228965759277, 'timestamp': '2025-09-10 02:35:57.854919', 'step': 2554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:57.912208', 'step': 2554, 'epoch': 1} {'type': 'loss', 'content': 0.21701017022132874, 'timestamp': '2025-09-10 02:35:57.914455', 'step': 2555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:57.971999', 'step': 2555, 'epoch': 1} {'type': 'loss', 'content': 0.1657685488462448, 'timestamp': '2025-09-10 02:35:57.978661', 'step': 2556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:58.037050', 'step': 2556, 'epoch': 1} {'type': 'loss', 'content': 0.13719169795513153, 'timestamp': '2025-09-10 02:35:58.039439', 'step': 2557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:58.095475', 'step': 2557, 'epoch': 1} {'type': 'loss', 'content': 0.14437957108020782, 'timestamp': '2025-09-10 02:35:58.097589', 'step': 2558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:58.153492', 'step': 2558, 'epoch': 1} {'type': 'loss', 'content': 0.10791278630495071, 'timestamp': '2025-09-10 02:35:58.156348', 'step': 2559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:58.213093', 'step': 2559, 'epoch': 1} {'type': 'loss', 'content': 0.19667395949363708, 'timestamp': '2025-09-10 02:35:58.219667', 'step': 2560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:58.275602', 'step': 2560, 'epoch': 1} {'type': 'loss', 'content': 0.14320427179336548, 'timestamp': '2025-09-10 02:35:58.278192', 'step': 2561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:58.335817', 'step': 2561, 'epoch': 1} {'type': 'loss', 'content': 0.10116834193468094, 'timestamp': '2025-09-10 02:35:58.338187', 'step': 2562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:58.396496', 'step': 2562, 'epoch': 1} {'type': 'loss', 'content': 0.0876866802573204, 'timestamp': '2025-09-10 02:35:58.398762', 'step': 2563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:58.454808', 'step': 2563, 'epoch': 1} {'type': 'loss', 'content': 0.10368437319993973, 'timestamp': '2025-09-10 02:35:58.461602', 'step': 2564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:58.517631', 'step': 2564, 'epoch': 1} {'type': 'loss', 'content': 0.16985160112380981, 'timestamp': '2025-09-10 02:35:58.519909', 'step': 2565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:58.575941', 'step': 2565, 'epoch': 1} {'type': 'loss', 'content': 0.15083609521389008, 'timestamp': '2025-09-10 02:35:58.578244', 'step': 2566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:58.635088', 'step': 2566, 'epoch': 1} {'type': 'loss', 'content': 0.15244624018669128, 'timestamp': '2025-09-10 02:35:58.637315', 'step': 2567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:58.693744', 'step': 2567, 'epoch': 1} {'type': 'loss', 'content': 0.16374865174293518, 'timestamp': '2025-09-10 02:35:58.700254', 'step': 2568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:58.755530', 'step': 2568, 'epoch': 1} {'type': 'loss', 'content': 0.13811151683330536, 'timestamp': '2025-09-10 02:35:58.757849', 'step': 2569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:58.813540', 'step': 2569, 'epoch': 1} {'type': 'loss', 'content': 0.17913934588432312, 'timestamp': '2025-09-10 02:35:58.815889', 'step': 2570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:58.872541', 'step': 2570, 'epoch': 1} {'type': 'loss', 'content': 0.23451057076454163, 'timestamp': '2025-09-10 02:35:58.874861', 'step': 2571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:58.931264', 'step': 2571, 'epoch': 1} {'type': 'loss', 'content': 0.1339561492204666, 'timestamp': '2025-09-10 02:35:58.937887', 'step': 2572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:58.991816', 'step': 2572, 'epoch': 1} {'type': 'loss', 'content': 0.1125352755188942, 'timestamp': '2025-09-10 02:35:58.993825', 'step': 2573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:59.049249', 'step': 2573, 'epoch': 1} {'type': 'loss', 'content': 0.14667586982250214, 'timestamp': '2025-09-10 02:35:59.051724', 'step': 2574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:59.107580', 'step': 2574, 'epoch': 1} {'type': 'loss', 'content': 0.1634446382522583, 'timestamp': '2025-09-10 02:35:59.109930', 'step': 2575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:59.165622', 'step': 2575, 'epoch': 1} {'type': 'loss', 'content': 0.0907667949795723, 'timestamp': '2025-09-10 02:35:59.174603', 'step': 2576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:59.229579', 'step': 2576, 'epoch': 1} {'type': 'loss', 'content': 0.10962141305208206, 'timestamp': '2025-09-10 02:35:59.231854', 'step': 2577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:35:59.291707', 'step': 2577, 'epoch': 1} {'type': 'loss', 'content': 0.2630016505718231, 'timestamp': '2025-09-10 02:35:59.293739', 'step': 2578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:59.348221', 'step': 2578, 'epoch': 1} {'type': 'loss', 'content': 0.21086646616458893, 'timestamp': '2025-09-10 02:35:59.350487', 'step': 2579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:59.407727', 'step': 2579, 'epoch': 1} {'type': 'loss', 'content': 0.09949909895658493, 'timestamp': '2025-09-10 02:35:59.414627', 'step': 2580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:59.469630', 'step': 2580, 'epoch': 1} {'type': 'loss', 'content': 0.1762579083442688, 'timestamp': '2025-09-10 02:35:59.472269', 'step': 2581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:59.527677', 'step': 2581, 'epoch': 1} {'type': 'loss', 'content': 0.20284610986709595, 'timestamp': '2025-09-10 02:35:59.539796', 'step': 2582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:59.597847', 'step': 2582, 'epoch': 1} {'type': 'loss', 'content': 0.16036692261695862, 'timestamp': '2025-09-10 02:35:59.603505', 'step': 2583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:59.663612', 'step': 2583, 'epoch': 1} {'type': 'loss', 'content': 0.19670890271663666, 'timestamp': '2025-09-10 02:35:59.676309', 'step': 2584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:59.737761', 'step': 2584, 'epoch': 1} {'type': 'loss', 'content': 0.15567688643932343, 'timestamp': '2025-09-10 02:35:59.740065', 'step': 2585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:35:59.804899', 'step': 2585, 'epoch': 1} {'type': 'loss', 'content': 0.14623267948627472, 'timestamp': '2025-09-10 02:35:59.807169', 'step': 2586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:59.863279', 'step': 2586, 'epoch': 1} {'type': 'loss', 'content': 0.19982334971427917, 'timestamp': '2025-09-10 02:35:59.865558', 'step': 2587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:59.920500', 'step': 2587, 'epoch': 1} {'type': 'loss', 'content': 0.18377938866615295, 'timestamp': '2025-09-10 02:35:59.927192', 'step': 2588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:35:59.981772', 'step': 2588, 'epoch': 1} {'type': 'loss', 'content': 0.17718975245952606, 'timestamp': '2025-09-10 02:35:59.986990', 'step': 2589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:00.049830', 'step': 2589, 'epoch': 1} {'type': 'loss', 'content': 0.1077515259385109, 'timestamp': '2025-09-10 02:36:00.051824', 'step': 2590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:00.106867', 'step': 2590, 'epoch': 1} {'type': 'loss', 'content': 0.20711158215999603, 'timestamp': '2025-09-10 02:36:00.109228', 'step': 2591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:00.164546', 'step': 2591, 'epoch': 1} {'type': 'loss', 'content': 0.17831619083881378, 'timestamp': '2025-09-10 02:36:00.170984', 'step': 2592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:00.224943', 'step': 2592, 'epoch': 1} {'type': 'loss', 'content': 0.1574169248342514, 'timestamp': '2025-09-10 02:36:00.227082', 'step': 2593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:00.281955', 'step': 2593, 'epoch': 1} {'type': 'loss', 'content': 0.16973906755447388, 'timestamp': '2025-09-10 02:36:00.288605', 'step': 2594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:00.344059', 'step': 2594, 'epoch': 1} {'type': 'loss', 'content': 0.06234070658683777, 'timestamp': '2025-09-10 02:36:00.346359', 'step': 2595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:00.401237', 'step': 2595, 'epoch': 1} {'type': 'loss', 'content': 0.3059388995170593, 'timestamp': '2025-09-10 02:36:00.407880', 'step': 2596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:00.461786', 'step': 2596, 'epoch': 1} {'type': 'loss', 'content': 0.08831891417503357, 'timestamp': '2025-09-10 02:36:00.464050', 'step': 2597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:00.519201', 'step': 2597, 'epoch': 1} {'type': 'loss', 'content': 0.1072155237197876, 'timestamp': '2025-09-10 02:36:00.523168', 'step': 2598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:00.577512', 'step': 2598, 'epoch': 1} {'type': 'loss', 'content': 0.16578523814678192, 'timestamp': '2025-09-10 02:36:00.579601', 'step': 2599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:00.633362', 'step': 2599, 'epoch': 1} {'type': 'loss', 'content': 0.21875673532485962, 'timestamp': '2025-09-10 02:36:00.640033', 'step': 2600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:00.694316', 'step': 2600, 'epoch': 1} {'type': 'loss', 'content': 0.13861040771007538, 'timestamp': '2025-09-10 02:36:00.696114', 'step': 2601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:00.750847', 'step': 2601, 'epoch': 1} {'type': 'loss', 'content': 0.19922217726707458, 'timestamp': '2025-09-10 02:36:00.753133', 'step': 2602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:00.808962', 'step': 2602, 'epoch': 1} {'type': 'loss', 'content': 0.15424534678459167, 'timestamp': '2025-09-10 02:36:00.811376', 'step': 2603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:00.871912', 'step': 2603, 'epoch': 1} {'type': 'loss', 'content': 0.14669343829154968, 'timestamp': '2025-09-10 02:36:00.878908', 'step': 2604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:00.942760', 'step': 2604, 'epoch': 1} {'type': 'loss', 'content': 0.17499874532222748, 'timestamp': '2025-09-10 02:36:00.944858', 'step': 2605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:01.000162', 'step': 2605, 'epoch': 1} {'type': 'loss', 'content': 0.24064315855503082, 'timestamp': '2025-09-10 02:36:01.002401', 'step': 2606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:01.058125', 'step': 2606, 'epoch': 1} {'type': 'loss', 'content': 0.2480790764093399, 'timestamp': '2025-09-10 02:36:01.060290', 'step': 2607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:01.115192', 'step': 2607, 'epoch': 1} {'type': 'loss', 'content': 0.11091239750385284, 'timestamp': '2025-09-10 02:36:01.121275', 'step': 2608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:01.175663', 'step': 2608, 'epoch': 1} {'type': 'loss', 'content': 0.23875923454761505, 'timestamp': '2025-09-10 02:36:01.177855', 'step': 2609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:01.233844', 'step': 2609, 'epoch': 1} {'type': 'loss', 'content': 0.2020823210477829, 'timestamp': '2025-09-10 02:36:01.235993', 'step': 2610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:01.295476', 'step': 2610, 'epoch': 1} {'type': 'loss', 'content': 0.13291804492473602, 'timestamp': '2025-09-10 02:36:01.297441', 'step': 2611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:01.351858', 'step': 2611, 'epoch': 1} {'type': 'loss', 'content': 0.1458457112312317, 'timestamp': '2025-09-10 02:36:01.357969', 'step': 2612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:01.411726', 'step': 2612, 'epoch': 1} {'type': 'loss', 'content': 0.1753721535205841, 'timestamp': '2025-09-10 02:36:01.413667', 'step': 2613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:01.466883', 'step': 2613, 'epoch': 1} {'type': 'loss', 'content': 0.11737170070409775, 'timestamp': '2025-09-10 02:36:01.468864', 'step': 2614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:01.523029', 'step': 2614, 'epoch': 1} {'type': 'loss', 'content': 0.1308935582637787, 'timestamp': '2025-09-10 02:36:01.525219', 'step': 2615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:01.578673', 'step': 2615, 'epoch': 1} {'type': 'loss', 'content': 0.283849835395813, 'timestamp': '2025-09-10 02:36:01.584719', 'step': 2616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:01.638809', 'step': 2616, 'epoch': 1} {'type': 'loss', 'content': 0.30180272459983826, 'timestamp': '2025-09-10 02:36:01.641056', 'step': 2617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:01.695243', 'step': 2617, 'epoch': 1} {'type': 'loss', 'content': 0.1522683948278427, 'timestamp': '2025-09-10 02:36:01.697501', 'step': 2618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:01.753124', 'step': 2618, 'epoch': 1} {'type': 'loss', 'content': 0.17775900661945343, 'timestamp': '2025-09-10 02:36:01.755229', 'step': 2619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:01.808666', 'step': 2619, 'epoch': 1} {'type': 'loss', 'content': 0.21686919033527374, 'timestamp': '2025-09-10 02:36:01.814793', 'step': 2620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:01.868214', 'step': 2620, 'epoch': 1} {'type': 'loss', 'content': 0.1419214904308319, 'timestamp': '2025-09-10 02:36:01.870307', 'step': 2621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:01.924075', 'step': 2621, 'epoch': 1} {'type': 'loss', 'content': 0.11183283478021622, 'timestamp': '2025-09-10 02:36:01.926115', 'step': 2622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:01.979141', 'step': 2622, 'epoch': 1} {'type': 'loss', 'content': 0.14022581279277802, 'timestamp': '2025-09-10 02:36:01.981060', 'step': 2623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:02.034391', 'step': 2623, 'epoch': 1} {'type': 'loss', 'content': 0.2527908980846405, 'timestamp': '2025-09-10 02:36:02.040323', 'step': 2624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:02.093180', 'step': 2624, 'epoch': 1} {'type': 'loss', 'content': 0.11902642250061035, 'timestamp': '2025-09-10 02:36:02.095224', 'step': 2625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:02.149712', 'step': 2625, 'epoch': 1} {'type': 'loss', 'content': 0.12755075097084045, 'timestamp': '2025-09-10 02:36:02.151782', 'step': 2626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:02.204527', 'step': 2626, 'epoch': 1} {'type': 'loss', 'content': 0.18312859535217285, 'timestamp': '2025-09-10 02:36:02.206715', 'step': 2627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:02.260439', 'step': 2627, 'epoch': 1} {'type': 'loss', 'content': 0.1617371290922165, 'timestamp': '2025-09-10 02:36:02.266261', 'step': 2628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:02.319825', 'step': 2628, 'epoch': 1} {'type': 'loss', 'content': 0.192702516913414, 'timestamp': '2025-09-10 02:36:02.321810', 'step': 2629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:02.377882', 'step': 2629, 'epoch': 1} {'type': 'loss', 'content': 0.0957770124077797, 'timestamp': '2025-09-10 02:36:02.379980', 'step': 2630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:02.437120', 'step': 2630, 'epoch': 1} {'type': 'loss', 'content': 0.22392643988132477, 'timestamp': '2025-09-10 02:36:02.439418', 'step': 2631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:02.497205', 'step': 2631, 'epoch': 1} {'type': 'loss', 'content': 0.15605579316616058, 'timestamp': '2025-09-10 02:36:02.504088', 'step': 2632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:02.563104', 'step': 2632, 'epoch': 1} {'type': 'loss', 'content': 0.2635246813297272, 'timestamp': '2025-09-10 02:36:02.565473', 'step': 2633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:02.623183', 'step': 2633, 'epoch': 1} {'type': 'loss', 'content': 0.15611481666564941, 'timestamp': '2025-09-10 02:36:02.625177', 'step': 2634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:02.682969', 'step': 2634, 'epoch': 1} {'type': 'loss', 'content': 0.18959158658981323, 'timestamp': '2025-09-10 02:36:02.685031', 'step': 2635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:02.741375', 'step': 2635, 'epoch': 1} {'type': 'loss', 'content': 0.20647776126861572, 'timestamp': '2025-09-10 02:36:02.748156', 'step': 2636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:02.805401', 'step': 2636, 'epoch': 1} {'type': 'loss', 'content': 0.13693949580192566, 'timestamp': '2025-09-10 02:36:02.806978', 'step': 2637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:02.863048', 'step': 2637, 'epoch': 1} {'type': 'loss', 'content': 0.15402960777282715, 'timestamp': '2025-09-10 02:36:02.865116', 'step': 2638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:02.922913', 'step': 2638, 'epoch': 1} {'type': 'loss', 'content': 0.15493972599506378, 'timestamp': '2025-09-10 02:36:02.924917', 'step': 2639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:02.980922', 'step': 2639, 'epoch': 1} {'type': 'loss', 'content': 0.1145656630396843, 'timestamp': '2025-09-10 02:36:02.987264', 'step': 2640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:03.041677', 'step': 2640, 'epoch': 1} {'type': 'loss', 'content': 0.27954888343811035, 'timestamp': '2025-09-10 02:36:03.043758', 'step': 2641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:03.099032', 'step': 2641, 'epoch': 1} {'type': 'loss', 'content': 0.20157849788665771, 'timestamp': '2025-09-10 02:36:03.101138', 'step': 2642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:03.157675', 'step': 2642, 'epoch': 1} {'type': 'loss', 'content': 0.12889878451824188, 'timestamp': '2025-09-10 02:36:03.161734', 'step': 2643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:03.217306', 'step': 2643, 'epoch': 1} {'type': 'loss', 'content': 0.09924361854791641, 'timestamp': '2025-09-10 02:36:03.223411', 'step': 2644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:03.281257', 'step': 2644, 'epoch': 1} {'type': 'loss', 'content': 0.1412847340106964, 'timestamp': '2025-09-10 02:36:03.283505', 'step': 2645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:03.343841', 'step': 2645, 'epoch': 1} {'type': 'loss', 'content': 0.13279938697814941, 'timestamp': '2025-09-10 02:36:03.348678', 'step': 2646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:03.414368', 'step': 2646, 'epoch': 1} {'type': 'loss', 'content': 0.13175828754901886, 'timestamp': '2025-09-10 02:36:03.416743', 'step': 2647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:03.478166', 'step': 2647, 'epoch': 1} {'type': 'loss', 'content': 0.150616854429245, 'timestamp': '2025-09-10 02:36:03.485347', 'step': 2648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:03.546347', 'step': 2648, 'epoch': 1} {'type': 'loss', 'content': 0.120772585272789, 'timestamp': '2025-09-10 02:36:03.548480', 'step': 2649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:03.606639', 'step': 2649, 'epoch': 1} {'type': 'loss', 'content': 0.23037968575954437, 'timestamp': '2025-09-10 02:36:03.608667', 'step': 2650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:03.663880', 'step': 2650, 'epoch': 1} {'type': 'loss', 'content': 0.23752717673778534, 'timestamp': '2025-09-10 02:36:03.667747', 'step': 2651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:03.721154', 'step': 2651, 'epoch': 1} {'type': 'loss', 'content': 0.21206390857696533, 'timestamp': '2025-09-10 02:36:03.727559', 'step': 2652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:03.781121', 'step': 2652, 'epoch': 1} {'type': 'loss', 'content': 0.1435585618019104, 'timestamp': '2025-09-10 02:36:03.783081', 'step': 2653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:03.837001', 'step': 2653, 'epoch': 1} {'type': 'loss', 'content': 0.09629498422145844, 'timestamp': '2025-09-10 02:36:03.839192', 'step': 2654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:03.896891', 'step': 2654, 'epoch': 1} {'type': 'loss', 'content': 0.2608509063720703, 'timestamp': '2025-09-10 02:36:03.898908', 'step': 2655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:03.954298', 'step': 2655, 'epoch': 1} {'type': 'loss', 'content': 0.15526558458805084, 'timestamp': '2025-09-10 02:36:03.961895', 'step': 2656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:04.014976', 'step': 2656, 'epoch': 1} {'type': 'loss', 'content': 0.12389615178108215, 'timestamp': '2025-09-10 02:36:04.017305', 'step': 2657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:04.071107', 'step': 2657, 'epoch': 1} {'type': 'loss', 'content': 0.1454898864030838, 'timestamp': '2025-09-10 02:36:04.073035', 'step': 2658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:04.126506', 'step': 2658, 'epoch': 1} {'type': 'loss', 'content': 0.17576806247234344, 'timestamp': '2025-09-10 02:36:04.128665', 'step': 2659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:04.181607', 'step': 2659, 'epoch': 1} {'type': 'loss', 'content': 0.1910039484500885, 'timestamp': '2025-09-10 02:36:04.192781', 'step': 2660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:04.249829', 'step': 2660, 'epoch': 1} {'type': 'loss', 'content': 0.2770267426967621, 'timestamp': '2025-09-10 02:36:04.256193', 'step': 2661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:04.315978', 'step': 2661, 'epoch': 1} {'type': 'loss', 'content': 0.18480731546878815, 'timestamp': '2025-09-10 02:36:04.319135', 'step': 2662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:04.373405', 'step': 2662, 'epoch': 1} {'type': 'loss', 'content': 0.1427505612373352, 'timestamp': '2025-09-10 02:36:04.375380', 'step': 2663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:04.432222', 'step': 2663, 'epoch': 1} {'type': 'loss', 'content': 0.16071468591690063, 'timestamp': '2025-09-10 02:36:04.438819', 'step': 2664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:04.492677', 'step': 2664, 'epoch': 1} {'type': 'loss', 'content': 0.1966647505760193, 'timestamp': '2025-09-10 02:36:04.500729', 'step': 2665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:04.558750', 'step': 2665, 'epoch': 1} {'type': 'loss', 'content': 0.128361314535141, 'timestamp': '2025-09-10 02:36:04.560851', 'step': 2666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:04.614174', 'step': 2666, 'epoch': 1} {'type': 'loss', 'content': 0.18545958399772644, 'timestamp': '2025-09-10 02:36:04.616366', 'step': 2667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:04.670968', 'step': 2667, 'epoch': 1} {'type': 'loss', 'content': 0.2004694640636444, 'timestamp': '2025-09-10 02:36:04.677859', 'step': 2668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:04.731863', 'step': 2668, 'epoch': 1} {'type': 'loss', 'content': 0.22571112215518951, 'timestamp': '2025-09-10 02:36:04.733822', 'step': 2669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:04.787078', 'step': 2669, 'epoch': 1} {'type': 'loss', 'content': 0.12562933564186096, 'timestamp': '2025-09-10 02:36:04.788981', 'step': 2670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:04.842697', 'step': 2670, 'epoch': 1} {'type': 'loss', 'content': 0.1865241974592209, 'timestamp': '2025-09-10 02:36:04.846400', 'step': 2671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:04.899608', 'step': 2671, 'epoch': 1} {'type': 'loss', 'content': 0.12180282920598984, 'timestamp': '2025-09-10 02:36:04.911758', 'step': 2672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:04.968692', 'step': 2672, 'epoch': 1} {'type': 'loss', 'content': 0.14133043587207794, 'timestamp': '2025-09-10 02:36:04.970744', 'step': 2673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:05.024411', 'step': 2673, 'epoch': 1} {'type': 'loss', 'content': 0.12982967495918274, 'timestamp': '2025-09-10 02:36:05.026464', 'step': 2674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:05.080700', 'step': 2674, 'epoch': 1} {'type': 'loss', 'content': 0.19968020915985107, 'timestamp': '2025-09-10 02:36:05.083600', 'step': 2675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:05.137327', 'step': 2675, 'epoch': 1} {'type': 'loss', 'content': 0.11993713676929474, 'timestamp': '2025-09-10 02:36:05.143442', 'step': 2676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:05.196076', 'step': 2676, 'epoch': 1} {'type': 'loss', 'content': 0.1472616046667099, 'timestamp': '2025-09-10 02:36:05.197845', 'step': 2677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:05.250378', 'step': 2677, 'epoch': 1} {'type': 'loss', 'content': 0.10934332758188248, 'timestamp': '2025-09-10 02:36:05.252228', 'step': 2678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:05.305170', 'step': 2678, 'epoch': 1} {'type': 'loss', 'content': 0.1816149353981018, 'timestamp': '2025-09-10 02:36:05.307220', 'step': 2679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:05.360712', 'step': 2679, 'epoch': 1} {'type': 'loss', 'content': 0.1768900454044342, 'timestamp': '2025-09-10 02:36:05.366741', 'step': 2680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:05.420055', 'step': 2680, 'epoch': 1} {'type': 'loss', 'content': 0.10461454093456268, 'timestamp': '2025-09-10 02:36:05.422121', 'step': 2681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:05.476297', 'step': 2681, 'epoch': 1} {'type': 'loss', 'content': 0.19343960285186768, 'timestamp': '2025-09-10 02:36:05.478450', 'step': 2682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:05.532177', 'step': 2682, 'epoch': 1} {'type': 'loss', 'content': 0.179607093334198, 'timestamp': '2025-09-10 02:36:05.534304', 'step': 2683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:05.588577', 'step': 2683, 'epoch': 1} {'type': 'loss', 'content': 0.19699445366859436, 'timestamp': '2025-09-10 02:36:05.594543', 'step': 2684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:05.647316', 'step': 2684, 'epoch': 1} {'type': 'loss', 'content': 0.13078464567661285, 'timestamp': '2025-09-10 02:36:05.649422', 'step': 2685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:05.702561', 'step': 2685, 'epoch': 1} {'type': 'loss', 'content': 0.1913040280342102, 'timestamp': '2025-09-10 02:36:05.704700', 'step': 2686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:05.758114', 'step': 2686, 'epoch': 1} {'type': 'loss', 'content': 0.141615629196167, 'timestamp': '2025-09-10 02:36:05.760264', 'step': 2687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:05.813689', 'step': 2687, 'epoch': 1} {'type': 'loss', 'content': 0.12714307010173798, 'timestamp': '2025-09-10 02:36:05.819534', 'step': 2688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:05.872224', 'step': 2688, 'epoch': 1} {'type': 'loss', 'content': 0.13969413936138153, 'timestamp': '2025-09-10 02:36:05.874410', 'step': 2689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:05.928121', 'step': 2689, 'epoch': 1} {'type': 'loss', 'content': 0.1641124039888382, 'timestamp': '2025-09-10 02:36:05.930305', 'step': 2690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:05.985212', 'step': 2690, 'epoch': 1} {'type': 'loss', 'content': 0.19056129455566406, 'timestamp': '2025-09-10 02:36:05.987375', 'step': 2691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:06.041393', 'step': 2691, 'epoch': 1} {'type': 'loss', 'content': 0.21744993329048157, 'timestamp': '2025-09-10 02:36:06.047453', 'step': 2692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:06.101444', 'step': 2692, 'epoch': 1} {'type': 'loss', 'content': 0.20578478276729584, 'timestamp': '2025-09-10 02:36:06.103424', 'step': 2693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:06.157491', 'step': 2693, 'epoch': 1} {'type': 'loss', 'content': 0.1739773452281952, 'timestamp': '2025-09-10 02:36:06.159461', 'step': 2694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:06.213517', 'step': 2694, 'epoch': 1} {'type': 'loss', 'content': 0.1452953964471817, 'timestamp': '2025-09-10 02:36:06.215644', 'step': 2695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:06.269254', 'step': 2695, 'epoch': 1} {'type': 'loss', 'content': 0.13824865221977234, 'timestamp': '2025-09-10 02:36:06.275003', 'step': 2696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:06.328846', 'step': 2696, 'epoch': 1} {'type': 'loss', 'content': 0.12060519307851791, 'timestamp': '2025-09-10 02:36:06.330687', 'step': 2697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:06.384692', 'step': 2697, 'epoch': 1} {'type': 'loss', 'content': 0.17261649668216705, 'timestamp': '2025-09-10 02:36:06.386944', 'step': 2698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:06.441302', 'step': 2698, 'epoch': 1} {'type': 'loss', 'content': 0.20244422554969788, 'timestamp': '2025-09-10 02:36:06.443235', 'step': 2699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:06.498008', 'step': 2699, 'epoch': 1} {'type': 'loss', 'content': 0.20425185561180115, 'timestamp': '2025-09-10 02:36:06.503651', 'step': 2700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:06.557719', 'step': 2700, 'epoch': 1} {'type': 'loss', 'content': 0.12035791575908661, 'timestamp': '2025-09-10 02:36:06.559070', 'step': 2701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:06.611906', 'step': 2701, 'epoch': 1} {'type': 'loss', 'content': 0.1922057718038559, 'timestamp': '2025-09-10 02:36:06.613258', 'step': 2702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:06.668011', 'step': 2702, 'epoch': 1} {'type': 'loss', 'content': 0.271314799785614, 'timestamp': '2025-09-10 02:36:06.669895', 'step': 2703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:06.724333', 'step': 2703, 'epoch': 1} {'type': 'loss', 'content': 0.15798617899417877, 'timestamp': '2025-09-10 02:36:06.730528', 'step': 2704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:06.783199', 'step': 2704, 'epoch': 1} {'type': 'loss', 'content': 0.21104241907596588, 'timestamp': '2025-09-10 02:36:06.784920', 'step': 2705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:06.837814', 'step': 2705, 'epoch': 1} {'type': 'loss', 'content': 0.1623859703540802, 'timestamp': '2025-09-10 02:36:06.839658', 'step': 2706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:06.893232', 'step': 2706, 'epoch': 1} {'type': 'loss', 'content': 0.2603214383125305, 'timestamp': '2025-09-10 02:36:06.895344', 'step': 2707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:06.948142', 'step': 2707, 'epoch': 1} {'type': 'loss', 'content': 0.20116645097732544, 'timestamp': '2025-09-10 02:36:06.954349', 'step': 2708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:07.007981', 'step': 2708, 'epoch': 1} {'type': 'loss', 'content': 0.21278157830238342, 'timestamp': '2025-09-10 02:36:07.009972', 'step': 2709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:07.064493', 'step': 2709, 'epoch': 1} {'type': 'loss', 'content': 0.2401609718799591, 'timestamp': '2025-09-10 02:36:07.066396', 'step': 2710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:07.122254', 'step': 2710, 'epoch': 1} {'type': 'loss', 'content': 0.17158901691436768, 'timestamp': '2025-09-10 02:36:07.123935', 'step': 2711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:07.177818', 'step': 2711, 'epoch': 1} {'type': 'loss', 'content': 0.21038319170475006, 'timestamp': '2025-09-10 02:36:07.183786', 'step': 2712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:07.237727', 'step': 2712, 'epoch': 1} {'type': 'loss', 'content': 0.2117350846529007, 'timestamp': '2025-09-10 02:36:07.239563', 'step': 2713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:07.293508', 'step': 2713, 'epoch': 1} {'type': 'loss', 'content': 0.14182370901107788, 'timestamp': '2025-09-10 02:36:07.295388', 'step': 2714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:07.347876', 'step': 2714, 'epoch': 1} {'type': 'loss', 'content': 0.18408112227916718, 'timestamp': '2025-09-10 02:36:07.350048', 'step': 2715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:07.403487', 'step': 2715, 'epoch': 1} {'type': 'loss', 'content': 0.11849101632833481, 'timestamp': '2025-09-10 02:36:07.409172', 'step': 2716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:07.463268', 'step': 2716, 'epoch': 1} {'type': 'loss', 'content': 0.14598031342029572, 'timestamp': '2025-09-10 02:36:07.464984', 'step': 2717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:07.518949', 'step': 2717, 'epoch': 1} {'type': 'loss', 'content': 0.20471301674842834, 'timestamp': '2025-09-10 02:36:07.520724', 'step': 2718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:07.574314', 'step': 2718, 'epoch': 1} {'type': 'loss', 'content': 0.136116623878479, 'timestamp': '2025-09-10 02:36:07.576195', 'step': 2719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:07.630269', 'step': 2719, 'epoch': 1} {'type': 'loss', 'content': 0.2929539084434509, 'timestamp': '2025-09-10 02:36:07.635889', 'step': 2720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:07.691160', 'step': 2720, 'epoch': 1} {'type': 'loss', 'content': 0.22519925236701965, 'timestamp': '2025-09-10 02:36:07.694093', 'step': 2721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:07.748696', 'step': 2721, 'epoch': 1} {'type': 'loss', 'content': 0.1617838442325592, 'timestamp': '2025-09-10 02:36:07.750435', 'step': 2722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:07.803764', 'step': 2722, 'epoch': 1} {'type': 'loss', 'content': 0.19136670231819153, 'timestamp': '2025-09-10 02:36:07.805781', 'step': 2723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:07.859163', 'step': 2723, 'epoch': 1} {'type': 'loss', 'content': 0.20333708822727203, 'timestamp': '2025-09-10 02:36:07.864845', 'step': 2724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:07.917301', 'step': 2724, 'epoch': 1} {'type': 'loss', 'content': 0.21774989366531372, 'timestamp': '2025-09-10 02:36:07.919271', 'step': 2725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:07.973201', 'step': 2725, 'epoch': 1} {'type': 'loss', 'content': 0.12699522078037262, 'timestamp': '2025-09-10 02:36:07.974908', 'step': 2726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:08.029200', 'step': 2726, 'epoch': 1} {'type': 'loss', 'content': 0.17904877662658691, 'timestamp': '2025-09-10 02:36:08.030903', 'step': 2727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:08.085528', 'step': 2727, 'epoch': 1} {'type': 'loss', 'content': 0.2559056580066681, 'timestamp': '2025-09-10 02:36:08.091066', 'step': 2728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:08.144883', 'step': 2728, 'epoch': 1} {'type': 'loss', 'content': 0.14264483749866486, 'timestamp': '2025-09-10 02:36:08.147123', 'step': 2729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:08.202140', 'step': 2729, 'epoch': 1} {'type': 'loss', 'content': 0.15003813803195953, 'timestamp': '2025-09-10 02:36:08.204214', 'step': 2730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:08.261094', 'step': 2730, 'epoch': 1} {'type': 'loss', 'content': 0.24753817915916443, 'timestamp': '2025-09-10 02:36:08.262884', 'step': 2731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:08.316752', 'step': 2731, 'epoch': 1} {'type': 'loss', 'content': 0.16715551912784576, 'timestamp': '2025-09-10 02:36:08.322677', 'step': 2732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:08.375419', 'step': 2732, 'epoch': 1} {'type': 'loss', 'content': 0.16065210103988647, 'timestamp': '2025-09-10 02:36:08.377402', 'step': 2733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:08.432407', 'step': 2733, 'epoch': 1} {'type': 'loss', 'content': 0.1906631588935852, 'timestamp': '2025-09-10 02:36:08.434273', 'step': 2734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:08.489318', 'step': 2734, 'epoch': 1} {'type': 'loss', 'content': 0.20568348467350006, 'timestamp': '2025-09-10 02:36:08.491047', 'step': 2735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:08.544727', 'step': 2735, 'epoch': 1} {'type': 'loss', 'content': 0.1850658506155014, 'timestamp': '2025-09-10 02:36:08.550458', 'step': 2736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:08.603897', 'step': 2736, 'epoch': 1} {'type': 'loss', 'content': 0.14952629804611206, 'timestamp': '2025-09-10 02:36:08.605822', 'step': 2737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:08.659229', 'step': 2737, 'epoch': 1} {'type': 'loss', 'content': 0.20800556242465973, 'timestamp': '2025-09-10 02:36:08.661310', 'step': 2738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:08.715710', 'step': 2738, 'epoch': 1} {'type': 'loss', 'content': 0.12355461716651917, 'timestamp': '2025-09-10 02:36:08.717838', 'step': 2739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:08.772306', 'step': 2739, 'epoch': 1} {'type': 'loss', 'content': 0.18873704969882965, 'timestamp': '2025-09-10 02:36:08.778331', 'step': 2740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:08.831297', 'step': 2740, 'epoch': 1} {'type': 'loss', 'content': 0.17613506317138672, 'timestamp': '2025-09-10 02:36:08.833467', 'step': 2741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:08.886721', 'step': 2741, 'epoch': 1} {'type': 'loss', 'content': 0.11536797136068344, 'timestamp': '2025-09-10 02:36:08.888427', 'step': 2742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:08.942025', 'step': 2742, 'epoch': 1} {'type': 'loss', 'content': 0.13622123003005981, 'timestamp': '2025-09-10 02:36:08.943877', 'step': 2743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:08.997799', 'step': 2743, 'epoch': 1} {'type': 'loss', 'content': 0.1606004387140274, 'timestamp': '2025-09-10 02:36:09.003456', 'step': 2744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:09.057748', 'step': 2744, 'epoch': 1} {'type': 'loss', 'content': 0.24035917222499847, 'timestamp': '2025-09-10 02:36:09.059649', 'step': 2745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:09.113707', 'step': 2745, 'epoch': 1} {'type': 'loss', 'content': 0.14590615034103394, 'timestamp': '2025-09-10 02:36:09.115445', 'step': 2746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:09.170733', 'step': 2746, 'epoch': 1} {'type': 'loss', 'content': 0.14918826520442963, 'timestamp': '2025-09-10 02:36:09.173029', 'step': 2747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:09.227940', 'step': 2747, 'epoch': 1} {'type': 'loss', 'content': 0.15624472498893738, 'timestamp': '2025-09-10 02:36:09.234178', 'step': 2748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:09.287962', 'step': 2748, 'epoch': 1} {'type': 'loss', 'content': 0.20661072432994843, 'timestamp': '2025-09-10 02:36:09.290086', 'step': 2749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:09.343527', 'step': 2749, 'epoch': 1} {'type': 'loss', 'content': 0.12014264613389969, 'timestamp': '2025-09-10 02:36:09.345800', 'step': 2750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:09.399343', 'step': 2750, 'epoch': 1} {'type': 'loss', 'content': 0.23372459411621094, 'timestamp': '2025-09-10 02:36:09.401054', 'step': 2751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:09.454689', 'step': 2751, 'epoch': 1} {'type': 'loss', 'content': 0.11229756474494934, 'timestamp': '2025-09-10 02:36:09.460249', 'step': 2752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:09.513238', 'step': 2752, 'epoch': 1} {'type': 'loss', 'content': 0.22383396327495575, 'timestamp': '2025-09-10 02:36:09.515112', 'step': 2753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:09.568386', 'step': 2753, 'epoch': 1} {'type': 'loss', 'content': 0.256911963224411, 'timestamp': '2025-09-10 02:36:09.570010', 'step': 2754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:09.623943', 'step': 2754, 'epoch': 1} {'type': 'loss', 'content': 0.21594980359077454, 'timestamp': '2025-09-10 02:36:09.626125', 'step': 2755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:09.679614', 'step': 2755, 'epoch': 1} {'type': 'loss', 'content': 0.12504547834396362, 'timestamp': '2025-09-10 02:36:09.685621', 'step': 2756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:09.739164', 'step': 2756, 'epoch': 1} {'type': 'loss', 'content': 0.1738535761833191, 'timestamp': '2025-09-10 02:36:09.741129', 'step': 2757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:09.795103', 'step': 2757, 'epoch': 1} {'type': 'loss', 'content': 0.20419782400131226, 'timestamp': '2025-09-10 02:36:09.797174', 'step': 2758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:09.851422', 'step': 2758, 'epoch': 1} {'type': 'loss', 'content': 0.15840493142604828, 'timestamp': '2025-09-10 02:36:09.853500', 'step': 2759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:09.907088', 'step': 2759, 'epoch': 1} {'type': 'loss', 'content': 0.10673629492521286, 'timestamp': '2025-09-10 02:36:09.912793', 'step': 2760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:09.966573', 'step': 2760, 'epoch': 1} {'type': 'loss', 'content': 0.14487287402153015, 'timestamp': '2025-09-10 02:36:09.968257', 'step': 2761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:10.022297', 'step': 2761, 'epoch': 1} {'type': 'loss', 'content': 0.16405007243156433, 'timestamp': '2025-09-10 02:36:10.024172', 'step': 2762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:10.077771', 'step': 2762, 'epoch': 1} {'type': 'loss', 'content': 0.21389524638652802, 'timestamp': '2025-09-10 02:36:10.079609', 'step': 2763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:10.133389', 'step': 2763, 'epoch': 1} {'type': 'loss', 'content': 0.19412200152873993, 'timestamp': '2025-09-10 02:36:10.139689', 'step': 2764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:10.193046', 'step': 2764, 'epoch': 1} {'type': 'loss', 'content': 0.1621021181344986, 'timestamp': '2025-09-10 02:36:10.195092', 'step': 2765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:10.249530', 'step': 2765, 'epoch': 1} {'type': 'loss', 'content': 0.20623792707920074, 'timestamp': '2025-09-10 02:36:10.251590', 'step': 2766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:10.306976', 'step': 2766, 'epoch': 1} {'type': 'loss', 'content': 0.18699388206005096, 'timestamp': '2025-09-10 02:36:10.309546', 'step': 2767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:10.363523', 'step': 2767, 'epoch': 1} {'type': 'loss', 'content': 0.1764378547668457, 'timestamp': '2025-09-10 02:36:10.369531', 'step': 2768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:10.423026', 'step': 2768, 'epoch': 1} {'type': 'loss', 'content': 0.1464204639196396, 'timestamp': '2025-09-10 02:36:10.424939', 'step': 2769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:10.479705', 'step': 2769, 'epoch': 1} {'type': 'loss', 'content': 0.20298415422439575, 'timestamp': '2025-09-10 02:36:10.481813', 'step': 2770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:10.535647', 'step': 2770, 'epoch': 1} {'type': 'loss', 'content': 0.11866128444671631, 'timestamp': '2025-09-10 02:36:10.537845', 'step': 2771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:10.591596', 'step': 2771, 'epoch': 1} {'type': 'loss', 'content': 0.25946810841560364, 'timestamp': '2025-09-10 02:36:10.597642', 'step': 2772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:10.650485', 'step': 2772, 'epoch': 1} {'type': 'loss', 'content': 0.08495800197124481, 'timestamp': '2025-09-10 02:36:10.652645', 'step': 2773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:10.706633', 'step': 2773, 'epoch': 1} {'type': 'loss', 'content': 0.22275838255882263, 'timestamp': '2025-09-10 02:36:10.708849', 'step': 2774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:10.763238', 'step': 2774, 'epoch': 1} {'type': 'loss', 'content': 0.17911724746227264, 'timestamp': '2025-09-10 02:36:10.765044', 'step': 2775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:10.820036', 'step': 2775, 'epoch': 1} {'type': 'loss', 'content': 0.18099819123744965, 'timestamp': '2025-09-10 02:36:10.826010', 'step': 2776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:10.879914', 'step': 2776, 'epoch': 1} {'type': 'loss', 'content': 0.14729394018650055, 'timestamp': '2025-09-10 02:36:10.881713', 'step': 2777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:10.936254', 'step': 2777, 'epoch': 1} {'type': 'loss', 'content': 0.13842085003852844, 'timestamp': '2025-09-10 02:36:10.938133', 'step': 2778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:10.993033', 'step': 2778, 'epoch': 1} {'type': 'loss', 'content': 0.11880127340555191, 'timestamp': '2025-09-10 02:36:10.995219', 'step': 2779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:11.049046', 'step': 2779, 'epoch': 1} {'type': 'loss', 'content': 0.2576591968536377, 'timestamp': '2025-09-10 02:36:11.055283', 'step': 2780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:11.109710', 'step': 2780, 'epoch': 1} {'type': 'loss', 'content': 0.16549788415431976, 'timestamp': '2025-09-10 02:36:11.111944', 'step': 2781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:11.165808', 'step': 2781, 'epoch': 1} {'type': 'loss', 'content': 0.15091457962989807, 'timestamp': '2025-09-10 02:36:11.167844', 'step': 2782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:11.223236', 'step': 2782, 'epoch': 1} {'type': 'loss', 'content': 0.21126540005207062, 'timestamp': '2025-09-10 02:36:11.225301', 'step': 2783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:11.280697', 'step': 2783, 'epoch': 1} {'type': 'loss', 'content': 0.22520625591278076, 'timestamp': '2025-09-10 02:36:11.286921', 'step': 2784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:11.341424', 'step': 2784, 'epoch': 1} {'type': 'loss', 'content': 0.09620023518800735, 'timestamp': '2025-09-10 02:36:11.343696', 'step': 2785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:11.397447', 'step': 2785, 'epoch': 1} {'type': 'loss', 'content': 0.2612887918949127, 'timestamp': '2025-09-10 02:36:11.399474', 'step': 2786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:11.454180', 'step': 2786, 'epoch': 1} {'type': 'loss', 'content': 0.189510777592659, 'timestamp': '2025-09-10 02:36:11.456101', 'step': 2787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:11.509576', 'step': 2787, 'epoch': 1} {'type': 'loss', 'content': 0.18685831129550934, 'timestamp': '2025-09-10 02:36:11.515756', 'step': 2788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:11.570006', 'step': 2788, 'epoch': 1} {'type': 'loss', 'content': 0.13787662982940674, 'timestamp': '2025-09-10 02:36:11.572326', 'step': 2789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:11.627024', 'step': 2789, 'epoch': 1} {'type': 'loss', 'content': 0.2151784896850586, 'timestamp': '2025-09-10 02:36:11.629177', 'step': 2790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:11.682838', 'step': 2790, 'epoch': 1} {'type': 'loss', 'content': 0.14007128775119781, 'timestamp': '2025-09-10 02:36:11.684861', 'step': 2791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:11.738526', 'step': 2791, 'epoch': 1} {'type': 'loss', 'content': 0.351874977350235, 'timestamp': '2025-09-10 02:36:11.744618', 'step': 2792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:11.798026', 'step': 2792, 'epoch': 1} {'type': 'loss', 'content': 0.14279471337795258, 'timestamp': '2025-09-10 02:36:11.800114', 'step': 2793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:11.854183', 'step': 2793, 'epoch': 1} {'type': 'loss', 'content': 0.17775557935237885, 'timestamp': '2025-09-10 02:36:11.856373', 'step': 2794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:11.911644', 'step': 2794, 'epoch': 1} {'type': 'loss', 'content': 0.16863395273685455, 'timestamp': '2025-09-10 02:36:11.913745', 'step': 2795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:11.968077', 'step': 2795, 'epoch': 1} {'type': 'loss', 'content': 0.15574267506599426, 'timestamp': '2025-09-10 02:36:11.974459', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:36:24.769898', 'step': 2796, 'epoch': 1} {'type': 'pplx', 'content': 11973.270559878305, 'timestamp': '2025-09-10 02:36:24.773102', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:24.827081', 'step': 2796, 'epoch': 1} {'type': 'loss', 'content': 0.2670155167579651, 'timestamp': '2025-09-10 02:36:24.829230', 'step': 2797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:24.884201', 'step': 2797, 'epoch': 1} {'type': 'loss', 'content': 0.20459720492362976, 'timestamp': '2025-09-10 02:36:24.886378', 'step': 2798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:24.940663', 'step': 2798, 'epoch': 1} {'type': 'loss', 'content': 0.11216826736927032, 'timestamp': '2025-09-10 02:36:24.942732', 'step': 2799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:25.001166', 'step': 2799, 'epoch': 1} {'type': 'loss', 'content': 0.16396617889404297, 'timestamp': '2025-09-10 02:36:25.007393', 'step': 2800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:25.060894', 'step': 2800, 'epoch': 1} {'type': 'loss', 'content': 0.11966178566217422, 'timestamp': '2025-09-10 02:36:25.063065', 'step': 2801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:25.116725', 'step': 2801, 'epoch': 1} {'type': 'loss', 'content': 0.14827623963356018, 'timestamp': '2025-09-10 02:36:25.118883', 'step': 2802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:25.172340', 'step': 2802, 'epoch': 1} {'type': 'loss', 'content': 0.1184634268283844, 'timestamp': '2025-09-10 02:36:25.174485', 'step': 2803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:25.228938', 'step': 2803, 'epoch': 1} {'type': 'loss', 'content': 0.1860639899969101, 'timestamp': '2025-09-10 02:36:25.235089', 'step': 2804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:25.288549', 'step': 2804, 'epoch': 1} {'type': 'loss', 'content': 0.1816771775484085, 'timestamp': '2025-09-10 02:36:25.290593', 'step': 2805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:25.344660', 'step': 2805, 'epoch': 1} {'type': 'loss', 'content': 0.12789948284626007, 'timestamp': '2025-09-10 02:36:25.346764', 'step': 2806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:25.400797', 'step': 2806, 'epoch': 1} {'type': 'loss', 'content': 0.22669832408428192, 'timestamp': '2025-09-10 02:36:25.402969', 'step': 2807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:25.456757', 'step': 2807, 'epoch': 1} {'type': 'loss', 'content': 0.21699050068855286, 'timestamp': '2025-09-10 02:36:25.462949', 'step': 2808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:25.517196', 'step': 2808, 'epoch': 1} {'type': 'loss', 'content': 0.4320470094680786, 'timestamp': '2025-09-10 02:36:25.519327', 'step': 2809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:25.575292', 'step': 2809, 'epoch': 1} {'type': 'loss', 'content': 0.1554960310459137, 'timestamp': '2025-09-10 02:36:25.577351', 'step': 2810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:25.631858', 'step': 2810, 'epoch': 1} {'type': 'loss', 'content': 0.1440795660018921, 'timestamp': '2025-09-10 02:36:25.634159', 'step': 2811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:25.688970', 'step': 2811, 'epoch': 1} {'type': 'loss', 'content': 0.11923769861459732, 'timestamp': '2025-09-10 02:36:25.694970', 'step': 2812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:25.748449', 'step': 2812, 'epoch': 1} {'type': 'loss', 'content': 0.19235508143901825, 'timestamp': '2025-09-10 02:36:25.750617', 'step': 2813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:25.806640', 'step': 2813, 'epoch': 1} {'type': 'loss', 'content': 0.15185244381427765, 'timestamp': '2025-09-10 02:36:25.808915', 'step': 2814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:25.863719', 'step': 2814, 'epoch': 1} {'type': 'loss', 'content': 0.20346496999263763, 'timestamp': '2025-09-10 02:36:25.866099', 'step': 2815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:25.921163', 'step': 2815, 'epoch': 1} {'type': 'loss', 'content': 0.18927329778671265, 'timestamp': '2025-09-10 02:36:25.927435', 'step': 2816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:25.980651', 'step': 2816, 'epoch': 1} {'type': 'loss', 'content': 0.13354623317718506, 'timestamp': '2025-09-10 02:36:25.982821', 'step': 2817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.036453', 'step': 2817, 'epoch': 1} {'type': 'loss', 'content': 0.13681653141975403, 'timestamp': '2025-09-10 02:36:26.038623', 'step': 2818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:26.095227', 'step': 2818, 'epoch': 1} {'type': 'loss', 'content': 0.3003681004047394, 'timestamp': '2025-09-10 02:36:26.097448', 'step': 2819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:26.151993', 'step': 2819, 'epoch': 1} {'type': 'loss', 'content': 0.19594693183898926, 'timestamp': '2025-09-10 02:36:26.158160', 'step': 2820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.211615', 'step': 2820, 'epoch': 1} {'type': 'loss', 'content': 0.1739855855703354, 'timestamp': '2025-09-10 02:36:26.213867', 'step': 2821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.267516', 'step': 2821, 'epoch': 1} {'type': 'loss', 'content': 0.2538778781890869, 'timestamp': '2025-09-10 02:36:26.269803', 'step': 2822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:26.324372', 'step': 2822, 'epoch': 1} {'type': 'loss', 'content': 0.20374040305614471, 'timestamp': '2025-09-10 02:36:26.326222', 'step': 2823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.380564', 'step': 2823, 'epoch': 1} {'type': 'loss', 'content': 0.09136465191841125, 'timestamp': '2025-09-10 02:36:26.386096', 'step': 2824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:26.439202', 'step': 2824, 'epoch': 1} {'type': 'loss', 'content': 0.14761976897716522, 'timestamp': '2025-09-10 02:36:26.441310', 'step': 2825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:26.495929', 'step': 2825, 'epoch': 1} {'type': 'loss', 'content': 0.17447970807552338, 'timestamp': '2025-09-10 02:36:26.498110', 'step': 2826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.554568', 'step': 2826, 'epoch': 1} {'type': 'loss', 'content': 0.14853432774543762, 'timestamp': '2025-09-10 02:36:26.556676', 'step': 2827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:26.611203', 'step': 2827, 'epoch': 1} {'type': 'loss', 'content': 0.08769051730632782, 'timestamp': '2025-09-10 02:36:26.617623', 'step': 2828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.671646', 'step': 2828, 'epoch': 1} {'type': 'loss', 'content': 0.10307889431715012, 'timestamp': '2025-09-10 02:36:26.673799', 'step': 2829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.728293', 'step': 2829, 'epoch': 1} {'type': 'loss', 'content': 0.276154488325119, 'timestamp': '2025-09-10 02:36:26.730474', 'step': 2830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.786197', 'step': 2830, 'epoch': 1} {'type': 'loss', 'content': 0.1990875005722046, 'timestamp': '2025-09-10 02:36:26.788364', 'step': 2831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.843304', 'step': 2831, 'epoch': 1} {'type': 'loss', 'content': 0.1649939864873886, 'timestamp': '2025-09-10 02:36:26.849552', 'step': 2832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:26.903304', 'step': 2832, 'epoch': 1} {'type': 'loss', 'content': 0.11250919848680496, 'timestamp': '2025-09-10 02:36:26.905392', 'step': 2833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:26.961083', 'step': 2833, 'epoch': 1} {'type': 'loss', 'content': 0.13852712512016296, 'timestamp': '2025-09-10 02:36:26.963374', 'step': 2834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:27.018692', 'step': 2834, 'epoch': 1} {'type': 'loss', 'content': 0.13207556307315826, 'timestamp': '2025-09-10 02:36:27.020942', 'step': 2835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:27.079001', 'step': 2835, 'epoch': 1} {'type': 'loss', 'content': 0.1923944354057312, 'timestamp': '2025-09-10 02:36:27.085838', 'step': 2836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:27.143662', 'step': 2836, 'epoch': 1} {'type': 'loss', 'content': 0.22255104780197144, 'timestamp': '2025-09-10 02:36:27.145985', 'step': 2837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:27.203303', 'step': 2837, 'epoch': 1} {'type': 'loss', 'content': 0.20617729425430298, 'timestamp': '2025-09-10 02:36:27.205430', 'step': 2838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:27.262139', 'step': 2838, 'epoch': 1} {'type': 'loss', 'content': 0.15967914462089539, 'timestamp': '2025-09-10 02:36:27.264283', 'step': 2839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:27.319390', 'step': 2839, 'epoch': 1} {'type': 'loss', 'content': 0.18150579929351807, 'timestamp': '2025-09-10 02:36:27.325744', 'step': 2840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:27.380111', 'step': 2840, 'epoch': 1} {'type': 'loss', 'content': 0.30979931354522705, 'timestamp': '2025-09-10 02:36:27.382313', 'step': 2841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:27.437310', 'step': 2841, 'epoch': 1} {'type': 'loss', 'content': 0.15207429230213165, 'timestamp': '2025-09-10 02:36:27.439495', 'step': 2842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:27.493819', 'step': 2842, 'epoch': 1} {'type': 'loss', 'content': 0.1430385410785675, 'timestamp': '2025-09-10 02:36:27.496173', 'step': 2843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:27.550129', 'step': 2843, 'epoch': 1} {'type': 'loss', 'content': 0.14648066461086273, 'timestamp': '2025-09-10 02:36:27.556361', 'step': 2844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:27.611745', 'step': 2844, 'epoch': 1} {'type': 'loss', 'content': 0.19942951202392578, 'timestamp': '2025-09-10 02:36:27.613940', 'step': 2845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:27.670472', 'step': 2845, 'epoch': 1} {'type': 'loss', 'content': 0.1700279414653778, 'timestamp': '2025-09-10 02:36:27.672484', 'step': 2846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:27.729223', 'step': 2846, 'epoch': 1} {'type': 'loss', 'content': 0.17582783102989197, 'timestamp': '2025-09-10 02:36:27.731531', 'step': 2847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:27.785733', 'step': 2847, 'epoch': 1} {'type': 'loss', 'content': 0.13437214493751526, 'timestamp': '2025-09-10 02:36:27.792065', 'step': 2848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:27.846802', 'step': 2848, 'epoch': 1} {'type': 'loss', 'content': 0.12037038803100586, 'timestamp': '2025-09-10 02:36:27.849043', 'step': 2849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:27.905291', 'step': 2849, 'epoch': 1} {'type': 'loss', 'content': 0.12229849398136139, 'timestamp': '2025-09-10 02:36:27.907495', 'step': 2850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:27.967867', 'step': 2850, 'epoch': 1} {'type': 'loss', 'content': 0.15102213621139526, 'timestamp': '2025-09-10 02:36:27.970115', 'step': 2851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:28.027798', 'step': 2851, 'epoch': 1} {'type': 'loss', 'content': 0.14836229383945465, 'timestamp': '2025-09-10 02:36:28.034761', 'step': 2852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:28.107280', 'step': 2852, 'epoch': 1} {'type': 'loss', 'content': 0.1453336775302887, 'timestamp': '2025-09-10 02:36:28.109461', 'step': 2853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:28.168346', 'step': 2853, 'epoch': 1} {'type': 'loss', 'content': 0.17946824431419373, 'timestamp': '2025-09-10 02:36:28.170473', 'step': 2854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:28.226250', 'step': 2854, 'epoch': 1} {'type': 'loss', 'content': 0.20720736682415009, 'timestamp': '2025-09-10 02:36:28.228430', 'step': 2855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:28.283423', 'step': 2855, 'epoch': 1} {'type': 'loss', 'content': 0.16341890394687653, 'timestamp': '2025-09-10 02:36:28.289750', 'step': 2856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:28.343271', 'step': 2856, 'epoch': 1} {'type': 'loss', 'content': 0.22375984489917755, 'timestamp': '2025-09-10 02:36:28.345561', 'step': 2857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:28.399606', 'step': 2857, 'epoch': 1} {'type': 'loss', 'content': 0.13761352002620697, 'timestamp': '2025-09-10 02:36:28.401919', 'step': 2858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:28.457937', 'step': 2858, 'epoch': 1} {'type': 'loss', 'content': 0.298330157995224, 'timestamp': '2025-09-10 02:36:28.460102', 'step': 2859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:28.515949', 'step': 2859, 'epoch': 1} {'type': 'loss', 'content': 0.1964179277420044, 'timestamp': '2025-09-10 02:36:28.522096', 'step': 2860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:28.577304', 'step': 2860, 'epoch': 1} {'type': 'loss', 'content': 0.1653108298778534, 'timestamp': '2025-09-10 02:36:28.579103', 'step': 2861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:28.632653', 'step': 2861, 'epoch': 1} {'type': 'loss', 'content': 0.19439205527305603, 'timestamp': '2025-09-10 02:36:28.635059', 'step': 2862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:28.689279', 'step': 2862, 'epoch': 1} {'type': 'loss', 'content': 0.07148803025484085, 'timestamp': '2025-09-10 02:36:28.691471', 'step': 2863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:28.746576', 'step': 2863, 'epoch': 1} {'type': 'loss', 'content': 0.12646527588367462, 'timestamp': '2025-09-10 02:36:28.752974', 'step': 2864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:28.810446', 'step': 2864, 'epoch': 1} {'type': 'loss', 'content': 0.2199854701757431, 'timestamp': '2025-09-10 02:36:28.812632', 'step': 2865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:28.869521', 'step': 2865, 'epoch': 1} {'type': 'loss', 'content': 0.22477932274341583, 'timestamp': '2025-09-10 02:36:28.871704', 'step': 2866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:28.926963', 'step': 2866, 'epoch': 1} {'type': 'loss', 'content': 0.19949643313884735, 'timestamp': '2025-09-10 02:36:28.929085', 'step': 2867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:28.985615', 'step': 2867, 'epoch': 1} {'type': 'loss', 'content': 0.1557602882385254, 'timestamp': '2025-09-10 02:36:28.991772', 'step': 2868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:29.046918', 'step': 2868, 'epoch': 1} {'type': 'loss', 'content': 0.19951848685741425, 'timestamp': '2025-09-10 02:36:29.049060', 'step': 2869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:29.108283', 'step': 2869, 'epoch': 1} {'type': 'loss', 'content': 0.13253062963485718, 'timestamp': '2025-09-10 02:36:29.110423', 'step': 2870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:29.166548', 'step': 2870, 'epoch': 1} {'type': 'loss', 'content': 0.19064322113990784, 'timestamp': '2025-09-10 02:36:29.168824', 'step': 2871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:29.227283', 'step': 2871, 'epoch': 1} {'type': 'loss', 'content': 0.12730325758457184, 'timestamp': '2025-09-10 02:36:29.234294', 'step': 2872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:29.293693', 'step': 2872, 'epoch': 1} {'type': 'loss', 'content': 0.24491745233535767, 'timestamp': '2025-09-10 02:36:29.296055', 'step': 2873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:29.354371', 'step': 2873, 'epoch': 1} {'type': 'loss', 'content': 0.09138915687799454, 'timestamp': '2025-09-10 02:36:29.356648', 'step': 2874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:29.414216', 'step': 2874, 'epoch': 1} {'type': 'loss', 'content': 0.15415450930595398, 'timestamp': '2025-09-10 02:36:29.416464', 'step': 2875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:29.474077', 'step': 2875, 'epoch': 1} {'type': 'loss', 'content': 0.20951606333255768, 'timestamp': '2025-09-10 02:36:29.480772', 'step': 2876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:29.537506', 'step': 2876, 'epoch': 1} {'type': 'loss', 'content': 0.22685478627681732, 'timestamp': '2025-09-10 02:36:29.539758', 'step': 2877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:29.598007', 'step': 2877, 'epoch': 1} {'type': 'loss', 'content': 0.17901034653186798, 'timestamp': '2025-09-10 02:36:29.600247', 'step': 2878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:29.663664', 'step': 2878, 'epoch': 1} {'type': 'loss', 'content': 0.16533170640468597, 'timestamp': '2025-09-10 02:36:29.666173', 'step': 2879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:29.729275', 'step': 2879, 'epoch': 1} {'type': 'loss', 'content': 0.1594337522983551, 'timestamp': '2025-09-10 02:36:29.736647', 'step': 2880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:29.795695', 'step': 2880, 'epoch': 1} {'type': 'loss', 'content': 0.2229284644126892, 'timestamp': '2025-09-10 02:36:29.797814', 'step': 2881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:29.853814', 'step': 2881, 'epoch': 1} {'type': 'loss', 'content': 0.1971074640750885, 'timestamp': '2025-09-10 02:36:29.856071', 'step': 2882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:29.910656', 'step': 2882, 'epoch': 1} {'type': 'loss', 'content': 0.16638922691345215, 'timestamp': '2025-09-10 02:36:29.912827', 'step': 2883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:29.966849', 'step': 2883, 'epoch': 1} {'type': 'loss', 'content': 0.24575527012348175, 'timestamp': '2025-09-10 02:36:29.973006', 'step': 2884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:30.026148', 'step': 2884, 'epoch': 1} {'type': 'loss', 'content': 0.1206524446606636, 'timestamp': '2025-09-10 02:36:30.028411', 'step': 2885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:30.082347', 'step': 2885, 'epoch': 1} {'type': 'loss', 'content': 0.09679830074310303, 'timestamp': '2025-09-10 02:36:30.084581', 'step': 2886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:30.138912', 'step': 2886, 'epoch': 1} {'type': 'loss', 'content': 0.16993659734725952, 'timestamp': '2025-09-10 02:36:30.141154', 'step': 2887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:30.196096', 'step': 2887, 'epoch': 1} {'type': 'loss', 'content': 0.12762154638767242, 'timestamp': '2025-09-10 02:36:30.202221', 'step': 2888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:30.255406', 'step': 2888, 'epoch': 1} {'type': 'loss', 'content': 0.180310919880867, 'timestamp': '2025-09-10 02:36:30.257677', 'step': 2889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:30.311644', 'step': 2889, 'epoch': 1} {'type': 'loss', 'content': 0.15127356350421906, 'timestamp': '2025-09-10 02:36:30.313833', 'step': 2890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:30.367497', 'step': 2890, 'epoch': 1} {'type': 'loss', 'content': 0.12318529933691025, 'timestamp': '2025-09-10 02:36:30.369664', 'step': 2891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:30.423738', 'step': 2891, 'epoch': 1} {'type': 'loss', 'content': 0.10732384026050568, 'timestamp': '2025-09-10 02:36:30.429950', 'step': 2892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:30.484215', 'step': 2892, 'epoch': 1} {'type': 'loss', 'content': 0.14400357007980347, 'timestamp': '2025-09-10 02:36:30.486462', 'step': 2893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:30.542368', 'step': 2893, 'epoch': 1} {'type': 'loss', 'content': 0.1646861582994461, 'timestamp': '2025-09-10 02:36:30.544555', 'step': 2894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:30.600331', 'step': 2894, 'epoch': 1} {'type': 'loss', 'content': 0.12342645227909088, 'timestamp': '2025-09-10 02:36:30.602492', 'step': 2895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:30.660058', 'step': 2895, 'epoch': 1} {'type': 'loss', 'content': 0.17756076157093048, 'timestamp': '2025-09-10 02:36:30.666945', 'step': 2896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:30.722948', 'step': 2896, 'epoch': 1} {'type': 'loss', 'content': 0.104106105864048, 'timestamp': '2025-09-10 02:36:30.725451', 'step': 2897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:30.781648', 'step': 2897, 'epoch': 1} {'type': 'loss', 'content': 0.18148180842399597, 'timestamp': '2025-09-10 02:36:30.783786', 'step': 2898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:30.839212', 'step': 2898, 'epoch': 1} {'type': 'loss', 'content': 0.22532007098197937, 'timestamp': '2025-09-10 02:36:30.841366', 'step': 2899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:30.896427', 'step': 2899, 'epoch': 1} {'type': 'loss', 'content': 0.19476810097694397, 'timestamp': '2025-09-10 02:36:30.902623', 'step': 2900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:30.957347', 'step': 2900, 'epoch': 1} {'type': 'loss', 'content': 0.11913280189037323, 'timestamp': '2025-09-10 02:36:30.959601', 'step': 2901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:31.015818', 'step': 2901, 'epoch': 1} {'type': 'loss', 'content': 0.15460295975208282, 'timestamp': '2025-09-10 02:36:31.018151', 'step': 2902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:31.073281', 'step': 2902, 'epoch': 1} {'type': 'loss', 'content': 0.23104910552501678, 'timestamp': '2025-09-10 02:36:31.075443', 'step': 2903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:31.132229', 'step': 2903, 'epoch': 1} {'type': 'loss', 'content': 0.09848915785551071, 'timestamp': '2025-09-10 02:36:31.138835', 'step': 2904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:31.196137', 'step': 2904, 'epoch': 1} {'type': 'loss', 'content': 0.19420526921749115, 'timestamp': '2025-09-10 02:36:31.198303', 'step': 2905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:31.254490', 'step': 2905, 'epoch': 1} {'type': 'loss', 'content': 0.16816389560699463, 'timestamp': '2025-09-10 02:36:31.256589', 'step': 2906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:31.312405', 'step': 2906, 'epoch': 1} {'type': 'loss', 'content': 0.12310636043548584, 'timestamp': '2025-09-10 02:36:31.314728', 'step': 2907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:31.372172', 'step': 2907, 'epoch': 1} {'type': 'loss', 'content': 0.20436552166938782, 'timestamp': '2025-09-10 02:36:31.378794', 'step': 2908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:31.433857', 'step': 2908, 'epoch': 1} {'type': 'loss', 'content': 0.22664722800254822, 'timestamp': '2025-09-10 02:36:31.436058', 'step': 2909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:31.490234', 'step': 2909, 'epoch': 1} {'type': 'loss', 'content': 0.09974431246519089, 'timestamp': '2025-09-10 02:36:31.492451', 'step': 2910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:31.546111', 'step': 2910, 'epoch': 1} {'type': 'loss', 'content': 0.15003371238708496, 'timestamp': '2025-09-10 02:36:31.548202', 'step': 2911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:31.609370', 'step': 2911, 'epoch': 1} {'type': 'loss', 'content': 0.20721128582954407, 'timestamp': '2025-09-10 02:36:31.615442', 'step': 2912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:31.677770', 'step': 2912, 'epoch': 1} {'type': 'loss', 'content': 0.16738560795783997, 'timestamp': '2025-09-10 02:36:31.680080', 'step': 2913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:31.734380', 'step': 2913, 'epoch': 1} {'type': 'loss', 'content': 0.1962655633687973, 'timestamp': '2025-09-10 02:36:31.736552', 'step': 2914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:31.790112', 'step': 2914, 'epoch': 1} {'type': 'loss', 'content': 0.18128322064876556, 'timestamp': '2025-09-10 02:36:31.792448', 'step': 2915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:31.848461', 'step': 2915, 'epoch': 1} {'type': 'loss', 'content': 0.17769120633602142, 'timestamp': '2025-09-10 02:36:31.854425', 'step': 2916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:31.907177', 'step': 2916, 'epoch': 1} {'type': 'loss', 'content': 0.1583692878484726, 'timestamp': '2025-09-10 02:36:31.914784', 'step': 2917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:31.968482', 'step': 2917, 'epoch': 1} {'type': 'loss', 'content': 0.19743739068508148, 'timestamp': '2025-09-10 02:36:31.970500', 'step': 2918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:32.025365', 'step': 2918, 'epoch': 1} {'type': 'loss', 'content': 0.17422834038734436, 'timestamp': '2025-09-10 02:36:32.027439', 'step': 2919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:32.080203', 'step': 2919, 'epoch': 1} {'type': 'loss', 'content': 0.20502454042434692, 'timestamp': '2025-09-10 02:36:32.085954', 'step': 2920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:32.138866', 'step': 2920, 'epoch': 1} {'type': 'loss', 'content': 0.17550313472747803, 'timestamp': '2025-09-10 02:36:32.140862', 'step': 2921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:32.196186', 'step': 2921, 'epoch': 1} {'type': 'loss', 'content': 0.15589794516563416, 'timestamp': '2025-09-10 02:36:32.200616', 'step': 2922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:32.254277', 'step': 2922, 'epoch': 1} {'type': 'loss', 'content': 0.231941357254982, 'timestamp': '2025-09-10 02:36:32.256548', 'step': 2923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:32.310034', 'step': 2923, 'epoch': 1} {'type': 'loss', 'content': 0.18105286359786987, 'timestamp': '2025-09-10 02:36:32.316952', 'step': 2924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:32.377941', 'step': 2924, 'epoch': 1} {'type': 'loss', 'content': 0.23122544586658478, 'timestamp': '2025-09-10 02:36:32.380109', 'step': 2925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:32.434235', 'step': 2925, 'epoch': 1} {'type': 'loss', 'content': 0.19158636033535004, 'timestamp': '2025-09-10 02:36:32.436369', 'step': 2926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:32.490743', 'step': 2926, 'epoch': 1} {'type': 'loss', 'content': 0.10964012145996094, 'timestamp': '2025-09-10 02:36:32.492922', 'step': 2927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:32.546935', 'step': 2927, 'epoch': 1} {'type': 'loss', 'content': 0.1933416724205017, 'timestamp': '2025-09-10 02:36:32.552931', 'step': 2928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:32.606171', 'step': 2928, 'epoch': 1} {'type': 'loss', 'content': 0.23032931983470917, 'timestamp': '2025-09-10 02:36:32.608491', 'step': 2929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:32.661766', 'step': 2929, 'epoch': 1} {'type': 'loss', 'content': 0.18215225636959076, 'timestamp': '2025-09-10 02:36:32.663928', 'step': 2930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:32.718115', 'step': 2930, 'epoch': 1} {'type': 'loss', 'content': 0.1550116240978241, 'timestamp': '2025-09-10 02:36:32.720196', 'step': 2931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:32.773812', 'step': 2931, 'epoch': 1} {'type': 'loss', 'content': 0.206797257065773, 'timestamp': '2025-09-10 02:36:32.779851', 'step': 2932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:32.834709', 'step': 2932, 'epoch': 1} {'type': 'loss', 'content': 0.1840030997991562, 'timestamp': '2025-09-10 02:36:32.836570', 'step': 2933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:32.890059', 'step': 2933, 'epoch': 1} {'type': 'loss', 'content': 0.24652604758739471, 'timestamp': '2025-09-10 02:36:32.892197', 'step': 2934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:32.946141', 'step': 2934, 'epoch': 1} {'type': 'loss', 'content': 0.12955842912197113, 'timestamp': '2025-09-10 02:36:32.948369', 'step': 2935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:33.001968', 'step': 2935, 'epoch': 1} {'type': 'loss', 'content': 0.23300713300704956, 'timestamp': '2025-09-10 02:36:33.007834', 'step': 2936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:33.060854', 'step': 2936, 'epoch': 1} {'type': 'loss', 'content': 0.18651092052459717, 'timestamp': '2025-09-10 02:36:33.063056', 'step': 2937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:33.117936', 'step': 2937, 'epoch': 1} {'type': 'loss', 'content': 0.23432813584804535, 'timestamp': '2025-09-10 02:36:33.120198', 'step': 2938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:33.174215', 'step': 2938, 'epoch': 1} {'type': 'loss', 'content': 0.1808948814868927, 'timestamp': '2025-09-10 02:36:33.176466', 'step': 2939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:33.232829', 'step': 2939, 'epoch': 1} {'type': 'loss', 'content': 0.2480267882347107, 'timestamp': '2025-09-10 02:36:33.241425', 'step': 2940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:33.294903', 'step': 2940, 'epoch': 1} {'type': 'loss', 'content': 0.1946748048067093, 'timestamp': '2025-09-10 02:36:33.297267', 'step': 2941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:33.351012', 'step': 2941, 'epoch': 1} {'type': 'loss', 'content': 0.13363279402256012, 'timestamp': '2025-09-10 02:36:33.359787', 'step': 2942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:33.420101', 'step': 2942, 'epoch': 1} {'type': 'loss', 'content': 0.1467665731906891, 'timestamp': '2025-09-10 02:36:33.428570', 'step': 2943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:33.484085', 'step': 2943, 'epoch': 1} {'type': 'loss', 'content': 0.09369921684265137, 'timestamp': '2025-09-10 02:36:33.490298', 'step': 2944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:33.554757', 'step': 2944, 'epoch': 1} {'type': 'loss', 'content': 0.1518579125404358, 'timestamp': '2025-09-10 02:36:33.563651', 'step': 2945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:33.619877', 'step': 2945, 'epoch': 1} {'type': 'loss', 'content': 0.14629580080509186, 'timestamp': '2025-09-10 02:36:33.623180', 'step': 2946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:33.689224', 'step': 2946, 'epoch': 1} {'type': 'loss', 'content': 0.17526677250862122, 'timestamp': '2025-09-10 02:36:33.691342', 'step': 2947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:33.763051', 'step': 2947, 'epoch': 1} {'type': 'loss', 'content': 0.1322796493768692, 'timestamp': '2025-09-10 02:36:33.768942', 'step': 2948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:33.829725', 'step': 2948, 'epoch': 1} {'type': 'loss', 'content': 0.10487530380487442, 'timestamp': '2025-09-10 02:36:33.836719', 'step': 2949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:33.899731', 'step': 2949, 'epoch': 1} {'type': 'loss', 'content': 0.14293450117111206, 'timestamp': '2025-09-10 02:36:33.902059', 'step': 2950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:33.957223', 'step': 2950, 'epoch': 1} {'type': 'loss', 'content': 0.1863878220319748, 'timestamp': '2025-09-10 02:36:33.959317', 'step': 2951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:34.014540', 'step': 2951, 'epoch': 1} {'type': 'loss', 'content': 0.22249102592468262, 'timestamp': '2025-09-10 02:36:34.020771', 'step': 2952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:34.085010', 'step': 2952, 'epoch': 1} {'type': 'loss', 'content': 0.17168574035167694, 'timestamp': '2025-09-10 02:36:34.087125', 'step': 2953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:34.153680', 'step': 2953, 'epoch': 1} {'type': 'loss', 'content': 0.16530181467533112, 'timestamp': '2025-09-10 02:36:34.155781', 'step': 2954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:34.210361', 'step': 2954, 'epoch': 1} {'type': 'loss', 'content': 0.1735907644033432, 'timestamp': '2025-09-10 02:36:34.212546', 'step': 2955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:34.266623', 'step': 2955, 'epoch': 1} {'type': 'loss', 'content': 0.1081494465470314, 'timestamp': '2025-09-10 02:36:34.272611', 'step': 2956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:34.326962', 'step': 2956, 'epoch': 1} {'type': 'loss', 'content': 0.18338653445243835, 'timestamp': '2025-09-10 02:36:34.329085', 'step': 2957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:34.384826', 'step': 2957, 'epoch': 1} {'type': 'loss', 'content': 0.25108802318573, 'timestamp': '2025-09-10 02:36:34.391420', 'step': 2958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:34.446035', 'step': 2958, 'epoch': 1} {'type': 'loss', 'content': 0.2069496065378189, 'timestamp': '2025-09-10 02:36:34.449374', 'step': 2959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:34.504408', 'step': 2959, 'epoch': 1} {'type': 'loss', 'content': 0.1733533889055252, 'timestamp': '2025-09-10 02:36:34.510329', 'step': 2960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:34.565340', 'step': 2960, 'epoch': 1} {'type': 'loss', 'content': 0.17971940338611603, 'timestamp': '2025-09-10 02:36:34.567623', 'step': 2961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:34.622087', 'step': 2961, 'epoch': 1} {'type': 'loss', 'content': 0.1738920658826828, 'timestamp': '2025-09-10 02:36:34.624208', 'step': 2962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:34.679654', 'step': 2962, 'epoch': 1} {'type': 'loss', 'content': 0.1628146767616272, 'timestamp': '2025-09-10 02:36:34.682032', 'step': 2963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:34.735416', 'step': 2963, 'epoch': 1} {'type': 'loss', 'content': 0.14354875683784485, 'timestamp': '2025-09-10 02:36:34.742742', 'step': 2964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:34.797184', 'step': 2964, 'epoch': 1} {'type': 'loss', 'content': 0.2051582932472229, 'timestamp': '2025-09-10 02:36:34.799533', 'step': 2965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:34.857007', 'step': 2965, 'epoch': 1} {'type': 'loss', 'content': 0.22505150735378265, 'timestamp': '2025-09-10 02:36:34.865221', 'step': 2966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:34.921981', 'step': 2966, 'epoch': 1} {'type': 'loss', 'content': 0.1667250245809555, 'timestamp': '2025-09-10 02:36:34.924098', 'step': 2967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:34.977612', 'step': 2967, 'epoch': 1} {'type': 'loss', 'content': 0.11042510718107224, 'timestamp': '2025-09-10 02:36:34.983818', 'step': 2968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:35.036669', 'step': 2968, 'epoch': 1} {'type': 'loss', 'content': 0.15688951313495636, 'timestamp': '2025-09-10 02:36:35.038728', 'step': 2969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:35.096115', 'step': 2969, 'epoch': 1} {'type': 'loss', 'content': 0.1416301727294922, 'timestamp': '2025-09-10 02:36:35.100164', 'step': 2970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:35.158223', 'step': 2970, 'epoch': 1} {'type': 'loss', 'content': 0.22381305694580078, 'timestamp': '2025-09-10 02:36:35.164359', 'step': 2971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:35.219586', 'step': 2971, 'epoch': 1} {'type': 'loss', 'content': 0.20226944983005524, 'timestamp': '2025-09-10 02:36:35.226276', 'step': 2972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:35.280718', 'step': 2972, 'epoch': 1} {'type': 'loss', 'content': 0.14637401700019836, 'timestamp': '2025-09-10 02:36:35.283780', 'step': 2973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:35.343919', 'step': 2973, 'epoch': 1} {'type': 'loss', 'content': 0.1321636140346527, 'timestamp': '2025-09-10 02:36:35.346130', 'step': 2974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:35.401537', 'step': 2974, 'epoch': 1} {'type': 'loss', 'content': 0.1713370382785797, 'timestamp': '2025-09-10 02:36:35.403597', 'step': 2975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:35.458260', 'step': 2975, 'epoch': 1} {'type': 'loss', 'content': 0.13956999778747559, 'timestamp': '2025-09-10 02:36:35.464640', 'step': 2976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:35.519990', 'step': 2976, 'epoch': 1} {'type': 'loss', 'content': 0.16797403991222382, 'timestamp': '2025-09-10 02:36:35.522132', 'step': 2977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:35.578197', 'step': 2977, 'epoch': 1} {'type': 'loss', 'content': 0.14886970818042755, 'timestamp': '2025-09-10 02:36:35.580379', 'step': 2978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:35.636197', 'step': 2978, 'epoch': 1} {'type': 'loss', 'content': 0.1074368804693222, 'timestamp': '2025-09-10 02:36:35.638512', 'step': 2979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:35.695790', 'step': 2979, 'epoch': 1} {'type': 'loss', 'content': 0.18330806493759155, 'timestamp': '2025-09-10 02:36:35.702274', 'step': 2980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:35.757299', 'step': 2980, 'epoch': 1} {'type': 'loss', 'content': 0.1291990578174591, 'timestamp': '2025-09-10 02:36:35.759498', 'step': 2981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:35.814664', 'step': 2981, 'epoch': 1} {'type': 'loss', 'content': 0.190113827586174, 'timestamp': '2025-09-10 02:36:35.816838', 'step': 2982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:35.873655', 'step': 2982, 'epoch': 1} {'type': 'loss', 'content': 0.17413488030433655, 'timestamp': '2025-09-10 02:36:35.875768', 'step': 2983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:35.930720', 'step': 2983, 'epoch': 1} {'type': 'loss', 'content': 0.2705242335796356, 'timestamp': '2025-09-10 02:36:35.937354', 'step': 2984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:35.991802', 'step': 2984, 'epoch': 1} {'type': 'loss', 'content': 0.10946696996688843, 'timestamp': '2025-09-10 02:36:35.994051', 'step': 2985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:36.051459', 'step': 2985, 'epoch': 1} {'type': 'loss', 'content': 0.18225757777690887, 'timestamp': '2025-09-10 02:36:36.053663', 'step': 2986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:36.109678', 'step': 2986, 'epoch': 1} {'type': 'loss', 'content': 0.09974735230207443, 'timestamp': '2025-09-10 02:36:36.111959', 'step': 2987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:36.167430', 'step': 2987, 'epoch': 1} {'type': 'loss', 'content': 0.23435617983341217, 'timestamp': '2025-09-10 02:36:36.174356', 'step': 2988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:36.232100', 'step': 2988, 'epoch': 1} {'type': 'loss', 'content': 0.24803432822227478, 'timestamp': '2025-09-10 02:36:36.235133', 'step': 2989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:36.290777', 'step': 2989, 'epoch': 1} {'type': 'loss', 'content': 0.17376142740249634, 'timestamp': '2025-09-10 02:36:36.293073', 'step': 2990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:36.347844', 'step': 2990, 'epoch': 1} {'type': 'loss', 'content': 0.13858279585838318, 'timestamp': '2025-09-10 02:36:36.350012', 'step': 2991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:36.406019', 'step': 2991, 'epoch': 1} {'type': 'loss', 'content': 0.2330418974161148, 'timestamp': '2025-09-10 02:36:36.412301', 'step': 2992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:36.466723', 'step': 2992, 'epoch': 1} {'type': 'loss', 'content': 0.29033002257347107, 'timestamp': '2025-09-10 02:36:36.468831', 'step': 2993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:36.524084', 'step': 2993, 'epoch': 1} {'type': 'loss', 'content': 0.2500680983066559, 'timestamp': '2025-09-10 02:36:36.526290', 'step': 2994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:36.581311', 'step': 2994, 'epoch': 1} {'type': 'loss', 'content': 0.23868829011917114, 'timestamp': '2025-09-10 02:36:36.583528', 'step': 2995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:36.638653', 'step': 2995, 'epoch': 1} {'type': 'loss', 'content': 0.1599595695734024, 'timestamp': '2025-09-10 02:36:36.644967', 'step': 2996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:36.699552', 'step': 2996, 'epoch': 1} {'type': 'loss', 'content': 0.1846311092376709, 'timestamp': '2025-09-10 02:36:36.701999', 'step': 2997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:36.758656', 'step': 2997, 'epoch': 1} {'type': 'loss', 'content': 0.1488671898841858, 'timestamp': '2025-09-10 02:36:36.760844', 'step': 2998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:36.816779', 'step': 2998, 'epoch': 1} {'type': 'loss', 'content': 0.13588593900203705, 'timestamp': '2025-09-10 02:36:36.818849', 'step': 2999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:36.875470', 'step': 2999, 'epoch': 1} {'type': 'loss', 'content': 0.13790816068649292, 'timestamp': '2025-09-10 02:36:36.881959', 'step': 3000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-09-10 02:36:37.289242', 'step': 3000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:37.349840', 'step': 3000, 'epoch': 1} {'type': 'loss', 'content': 0.12560994923114777, 'timestamp': '2025-09-10 02:36:37.352344', 'step': 3001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:37.409363', 'step': 3001, 'epoch': 1} {'type': 'loss', 'content': 0.170742928981781, 'timestamp': '2025-09-10 02:36:37.411514', 'step': 3002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:37.469469', 'step': 3002, 'epoch': 1} {'type': 'loss', 'content': 0.15134099125862122, 'timestamp': '2025-09-10 02:36:37.471607', 'step': 3003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:37.526601', 'step': 3003, 'epoch': 1} {'type': 'loss', 'content': 0.14746037125587463, 'timestamp': '2025-09-10 02:36:37.533263', 'step': 3004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:37.588491', 'step': 3004, 'epoch': 1} {'type': 'loss', 'content': 0.10204731673002243, 'timestamp': '2025-09-10 02:36:37.590635', 'step': 3005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:37.645190', 'step': 3005, 'epoch': 1} {'type': 'loss', 'content': 0.17229260504245758, 'timestamp': '2025-09-10 02:36:37.647315', 'step': 3006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:37.704574', 'step': 3006, 'epoch': 1} {'type': 'loss', 'content': 0.2115374058485031, 'timestamp': '2025-09-10 02:36:37.706833', 'step': 3007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:37.762415', 'step': 3007, 'epoch': 1} {'type': 'loss', 'content': 0.08012882620096207, 'timestamp': '2025-09-10 02:36:37.768848', 'step': 3008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:37.824183', 'step': 3008, 'epoch': 1} {'type': 'loss', 'content': 0.20984961092472076, 'timestamp': '2025-09-10 02:36:37.826331', 'step': 3009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:37.881779', 'step': 3009, 'epoch': 1} {'type': 'loss', 'content': 0.2802469730377197, 'timestamp': '2025-09-10 02:36:37.883914', 'step': 3010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:37.939425', 'step': 3010, 'epoch': 1} {'type': 'loss', 'content': 0.144500270485878, 'timestamp': '2025-09-10 02:36:37.941610', 'step': 3011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:37.996287', 'step': 3011, 'epoch': 1} {'type': 'loss', 'content': 0.09236123412847519, 'timestamp': '2025-09-10 02:36:38.002634', 'step': 3012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:38.057064', 'step': 3012, 'epoch': 1} {'type': 'loss', 'content': 0.15503355860710144, 'timestamp': '2025-09-10 02:36:38.059201', 'step': 3013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:38.115252', 'step': 3013, 'epoch': 1} {'type': 'loss', 'content': 0.11926798522472382, 'timestamp': '2025-09-10 02:36:38.117355', 'step': 3014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:38.172788', 'step': 3014, 'epoch': 1} {'type': 'loss', 'content': 0.15482798218727112, 'timestamp': '2025-09-10 02:36:38.174898', 'step': 3015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:38.232843', 'step': 3015, 'epoch': 1} {'type': 'loss', 'content': 0.2221803367137909, 'timestamp': '2025-09-10 02:36:38.239642', 'step': 3016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:38.294775', 'step': 3016, 'epoch': 1} {'type': 'loss', 'content': 0.20611514151096344, 'timestamp': '2025-09-10 02:36:38.296966', 'step': 3017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:38.352249', 'step': 3017, 'epoch': 1} {'type': 'loss', 'content': 0.21300777792930603, 'timestamp': '2025-09-10 02:36:38.354393', 'step': 3018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:38.409335', 'step': 3018, 'epoch': 1} {'type': 'loss', 'content': 0.11228451132774353, 'timestamp': '2025-09-10 02:36:38.411469', 'step': 3019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:38.466508', 'step': 3019, 'epoch': 1} {'type': 'loss', 'content': 0.16562826931476593, 'timestamp': '2025-09-10 02:36:38.473007', 'step': 3020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:38.527488', 'step': 3020, 'epoch': 1} {'type': 'loss', 'content': 0.14891372621059418, 'timestamp': '2025-09-10 02:36:38.529790', 'step': 3021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:38.584494', 'step': 3021, 'epoch': 1} {'type': 'loss', 'content': 0.12565049529075623, 'timestamp': '2025-09-10 02:36:38.586533', 'step': 3022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:38.642478', 'step': 3022, 'epoch': 1} {'type': 'loss', 'content': 0.20751844346523285, 'timestamp': '2025-09-10 02:36:38.644684', 'step': 3023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:38.699411', 'step': 3023, 'epoch': 1} {'type': 'loss', 'content': 0.1899058073759079, 'timestamp': '2025-09-10 02:36:38.705915', 'step': 3024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:38.760554', 'step': 3024, 'epoch': 1} {'type': 'loss', 'content': 0.17022676765918732, 'timestamp': '2025-09-10 02:36:38.762933', 'step': 3025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:38.821247', 'step': 3025, 'epoch': 1} {'type': 'loss', 'content': 0.08948174864053726, 'timestamp': '2025-09-10 02:36:38.823684', 'step': 3026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:38.881742', 'step': 3026, 'epoch': 1} {'type': 'loss', 'content': 0.21071107685565948, 'timestamp': '2025-09-10 02:36:38.884116', 'step': 3027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:38.941268', 'step': 3027, 'epoch': 1} {'type': 'loss', 'content': 0.22985686361789703, 'timestamp': '2025-09-10 02:36:38.947739', 'step': 3028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:39.003366', 'step': 3028, 'epoch': 1} {'type': 'loss', 'content': 0.13134637475013733, 'timestamp': '2025-09-10 02:36:39.005570', 'step': 3029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:39.060388', 'step': 3029, 'epoch': 1} {'type': 'loss', 'content': 0.18573489785194397, 'timestamp': '2025-09-10 02:36:39.062787', 'step': 3030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:39.117545', 'step': 3030, 'epoch': 1} {'type': 'loss', 'content': 0.1541641652584076, 'timestamp': '2025-09-10 02:36:39.119808', 'step': 3031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:39.175334', 'step': 3031, 'epoch': 1} {'type': 'loss', 'content': 0.1735130399465561, 'timestamp': '2025-09-10 02:36:39.181827', 'step': 3032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:39.236052', 'step': 3032, 'epoch': 1} {'type': 'loss', 'content': 0.1820349097251892, 'timestamp': '2025-09-10 02:36:39.238141', 'step': 3033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:39.292506', 'step': 3033, 'epoch': 1} {'type': 'loss', 'content': 0.18286921083927155, 'timestamp': '2025-09-10 02:36:39.294722', 'step': 3034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:39.350341', 'step': 3034, 'epoch': 1} {'type': 'loss', 'content': 0.14617975056171417, 'timestamp': '2025-09-10 02:36:39.352505', 'step': 3035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:39.408554', 'step': 3035, 'epoch': 1} {'type': 'loss', 'content': 0.1814689338207245, 'timestamp': '2025-09-10 02:36:39.415238', 'step': 3036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:39.469953', 'step': 3036, 'epoch': 1} {'type': 'loss', 'content': 0.15469063818454742, 'timestamp': '2025-09-10 02:36:39.472137', 'step': 3037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:39.529318', 'step': 3037, 'epoch': 1} {'type': 'loss', 'content': 0.22952207922935486, 'timestamp': '2025-09-10 02:36:39.531559', 'step': 3038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:39.587627', 'step': 3038, 'epoch': 1} {'type': 'loss', 'content': 0.14167091250419617, 'timestamp': '2025-09-10 02:36:39.591015', 'step': 3039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:39.646153', 'step': 3039, 'epoch': 1} {'type': 'loss', 'content': 0.2178356796503067, 'timestamp': '2025-09-10 02:36:39.652629', 'step': 3040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:39.707684', 'step': 3040, 'epoch': 1} {'type': 'loss', 'content': 0.10697855800390244, 'timestamp': '2025-09-10 02:36:39.709774', 'step': 3041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:39.764653', 'step': 3041, 'epoch': 1} {'type': 'loss', 'content': 0.1626180112361908, 'timestamp': '2025-09-10 02:36:39.766927', 'step': 3042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:39.822574', 'step': 3042, 'epoch': 1} {'type': 'loss', 'content': 0.2267085611820221, 'timestamp': '2025-09-10 02:36:39.824744', 'step': 3043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:39.880630', 'step': 3043, 'epoch': 1} {'type': 'loss', 'content': 0.22127990424633026, 'timestamp': '2025-09-10 02:36:39.886834', 'step': 3044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:39.943016', 'step': 3044, 'epoch': 1} {'type': 'loss', 'content': 0.27625253796577454, 'timestamp': '2025-09-10 02:36:39.945554', 'step': 3045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:40.001761', 'step': 3045, 'epoch': 1} {'type': 'loss', 'content': 0.12213779240846634, 'timestamp': '2025-09-10 02:36:40.004319', 'step': 3046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:40.062426', 'step': 3046, 'epoch': 1} {'type': 'loss', 'content': 0.2438262552022934, 'timestamp': '2025-09-10 02:36:40.064722', 'step': 3047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:40.122131', 'step': 3047, 'epoch': 1} {'type': 'loss', 'content': 0.1275758445262909, 'timestamp': '2025-09-10 02:36:40.129167', 'step': 3048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:40.185707', 'step': 3048, 'epoch': 1} {'type': 'loss', 'content': 0.15082426369190216, 'timestamp': '2025-09-10 02:36:40.188062', 'step': 3049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:40.245711', 'step': 3049, 'epoch': 1} {'type': 'loss', 'content': 0.1131877452135086, 'timestamp': '2025-09-10 02:36:40.248301', 'step': 3050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:40.304490', 'step': 3050, 'epoch': 1} {'type': 'loss', 'content': 0.18562887609004974, 'timestamp': '2025-09-10 02:36:40.306807', 'step': 3051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:40.362931', 'step': 3051, 'epoch': 1} {'type': 'loss', 'content': 0.1696319729089737, 'timestamp': '2025-09-10 02:36:40.370003', 'step': 3052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:40.425294', 'step': 3052, 'epoch': 1} {'type': 'loss', 'content': 0.21535339951515198, 'timestamp': '2025-09-10 02:36:40.427647', 'step': 3053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:40.484196', 'step': 3053, 'epoch': 1} {'type': 'loss', 'content': 0.20761485397815704, 'timestamp': '2025-09-10 02:36:40.486476', 'step': 3054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:40.545108', 'step': 3054, 'epoch': 1} {'type': 'loss', 'content': 0.1207016184926033, 'timestamp': '2025-09-10 02:36:40.547552', 'step': 3055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:40.603859', 'step': 3055, 'epoch': 1} {'type': 'loss', 'content': 0.20506101846694946, 'timestamp': '2025-09-10 02:36:40.610592', 'step': 3056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:40.666222', 'step': 3056, 'epoch': 1} {'type': 'loss', 'content': 0.10690362751483917, 'timestamp': '2025-09-10 02:36:40.668839', 'step': 3057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:40.727135', 'step': 3057, 'epoch': 1} {'type': 'loss', 'content': 0.2846146821975708, 'timestamp': '2025-09-10 02:36:40.729524', 'step': 3058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:40.786294', 'step': 3058, 'epoch': 1} {'type': 'loss', 'content': 0.16132356226444244, 'timestamp': '2025-09-10 02:36:40.788810', 'step': 3059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:40.846994', 'step': 3059, 'epoch': 1} {'type': 'loss', 'content': 0.28397679328918457, 'timestamp': '2025-09-10 02:36:40.854006', 'step': 3060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:40.910246', 'step': 3060, 'epoch': 1} {'type': 'loss', 'content': 0.09515782445669174, 'timestamp': '2025-09-10 02:36:40.912670', 'step': 3061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:40.969218', 'step': 3061, 'epoch': 1} {'type': 'loss', 'content': 0.13069377839565277, 'timestamp': '2025-09-10 02:36:40.971564', 'step': 3062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:41.029242', 'step': 3062, 'epoch': 1} {'type': 'loss', 'content': 0.26189425587654114, 'timestamp': '2025-09-10 02:36:41.031492', 'step': 3063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:41.088528', 'step': 3063, 'epoch': 1} {'type': 'loss', 'content': 0.13767313957214355, 'timestamp': '2025-09-10 02:36:41.094923', 'step': 3064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:41.151434', 'step': 3064, 'epoch': 1} {'type': 'loss', 'content': 0.1952415108680725, 'timestamp': '2025-09-10 02:36:41.153730', 'step': 3065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:41.210132', 'step': 3065, 'epoch': 1} {'type': 'loss', 'content': 0.09449435770511627, 'timestamp': '2025-09-10 02:36:41.212743', 'step': 3066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:41.270691', 'step': 3066, 'epoch': 1} {'type': 'loss', 'content': 0.1638423651456833, 'timestamp': '2025-09-10 02:36:41.272942', 'step': 3067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:41.329682', 'step': 3067, 'epoch': 1} {'type': 'loss', 'content': 0.13530346751213074, 'timestamp': '2025-09-10 02:36:41.336335', 'step': 3068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:41.391715', 'step': 3068, 'epoch': 1} {'type': 'loss', 'content': 0.2602287530899048, 'timestamp': '2025-09-10 02:36:41.394108', 'step': 3069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:41.452011', 'step': 3069, 'epoch': 1} {'type': 'loss', 'content': 0.2399754375219345, 'timestamp': '2025-09-10 02:36:41.454357', 'step': 3070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:41.511659', 'step': 3070, 'epoch': 1} {'type': 'loss', 'content': 0.21975237131118774, 'timestamp': '2025-09-10 02:36:41.514231', 'step': 3071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:41.572203', 'step': 3071, 'epoch': 1} {'type': 'loss', 'content': 0.24881817400455475, 'timestamp': '2025-09-10 02:36:41.578967', 'step': 3072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:41.635635', 'step': 3072, 'epoch': 1} {'type': 'loss', 'content': 0.2425750344991684, 'timestamp': '2025-09-10 02:36:41.637954', 'step': 3073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:41.694729', 'step': 3073, 'epoch': 1} {'type': 'loss', 'content': 0.1303475797176361, 'timestamp': '2025-09-10 02:36:41.697379', 'step': 3074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:41.759425', 'step': 3074, 'epoch': 1} {'type': 'loss', 'content': 0.15621060132980347, 'timestamp': '2025-09-10 02:36:41.762014', 'step': 3075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:41.820873', 'step': 3075, 'epoch': 1} {'type': 'loss', 'content': 0.09915891289710999, 'timestamp': '2025-09-10 02:36:41.827920', 'step': 3076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:41.883967', 'step': 3076, 'epoch': 1} {'type': 'loss', 'content': 0.214742973446846, 'timestamp': '2025-09-10 02:36:41.886389', 'step': 3077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:41.943005', 'step': 3077, 'epoch': 1} {'type': 'loss', 'content': 0.16422484815120697, 'timestamp': '2025-09-10 02:36:41.945202', 'step': 3078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:42.002237', 'step': 3078, 'epoch': 1} {'type': 'loss', 'content': 0.13760332763195038, 'timestamp': '2025-09-10 02:36:42.004727', 'step': 3079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:42.062824', 'step': 3079, 'epoch': 1} {'type': 'loss', 'content': 0.2478722184896469, 'timestamp': '2025-09-10 02:36:42.069844', 'step': 3080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:42.126235', 'step': 3080, 'epoch': 1} {'type': 'loss', 'content': 0.20429611206054688, 'timestamp': '2025-09-10 02:36:42.128658', 'step': 3081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:42.185104', 'step': 3081, 'epoch': 1} {'type': 'loss', 'content': 0.24097371101379395, 'timestamp': '2025-09-10 02:36:42.187316', 'step': 3082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:42.244027', 'step': 3082, 'epoch': 1} {'type': 'loss', 'content': 0.15240709483623505, 'timestamp': '2025-09-10 02:36:42.246485', 'step': 3083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:42.302675', 'step': 3083, 'epoch': 1} {'type': 'loss', 'content': 0.1786963790655136, 'timestamp': '2025-09-10 02:36:42.309112', 'step': 3084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:42.365014', 'step': 3084, 'epoch': 1} {'type': 'loss', 'content': 0.07371129840612411, 'timestamp': '2025-09-10 02:36:42.367426', 'step': 3085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:42.422516', 'step': 3085, 'epoch': 1} {'type': 'loss', 'content': 0.1894311159849167, 'timestamp': '2025-09-10 02:36:42.424879', 'step': 3086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:42.480861', 'step': 3086, 'epoch': 1} {'type': 'loss', 'content': 0.16868478059768677, 'timestamp': '2025-09-10 02:36:42.483081', 'step': 3087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:42.538970', 'step': 3087, 'epoch': 1} {'type': 'loss', 'content': 0.2330087274312973, 'timestamp': '2025-09-10 02:36:42.545692', 'step': 3088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:42.601913', 'step': 3088, 'epoch': 1} {'type': 'loss', 'content': 0.26266834139823914, 'timestamp': '2025-09-10 02:36:42.604418', 'step': 3089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:42.661462', 'step': 3089, 'epoch': 1} {'type': 'loss', 'content': 0.10998016595840454, 'timestamp': '2025-09-10 02:36:42.663535', 'step': 3090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:42.719137', 'step': 3090, 'epoch': 1} {'type': 'loss', 'content': 0.20684202015399933, 'timestamp': '2025-09-10 02:36:42.721294', 'step': 3091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:42.776308', 'step': 3091, 'epoch': 1} {'type': 'loss', 'content': 0.1633743941783905, 'timestamp': '2025-09-10 02:36:42.782510', 'step': 3092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:42.836137', 'step': 3092, 'epoch': 1} {'type': 'loss', 'content': 0.21125304698944092, 'timestamp': '2025-09-10 02:36:42.838595', 'step': 3093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:42.893580', 'step': 3093, 'epoch': 1} {'type': 'loss', 'content': 0.11933848261833191, 'timestamp': '2025-09-10 02:36:42.895931', 'step': 3094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:42.951488', 'step': 3094, 'epoch': 1} {'type': 'loss', 'content': 0.13836713135242462, 'timestamp': '2025-09-10 02:36:42.953744', 'step': 3095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:43.007834', 'step': 3095, 'epoch': 1} {'type': 'loss', 'content': 0.18790537118911743, 'timestamp': '2025-09-10 02:36:43.014112', 'step': 3096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:43.067921', 'step': 3096, 'epoch': 1} {'type': 'loss', 'content': 0.09702955186367035, 'timestamp': '2025-09-10 02:36:43.070094', 'step': 3097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:43.125833', 'step': 3097, 'epoch': 1} {'type': 'loss', 'content': 0.24680204689502716, 'timestamp': '2025-09-10 02:36:43.127983', 'step': 3098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:43.184943', 'step': 3098, 'epoch': 1} {'type': 'loss', 'content': 0.1452530026435852, 'timestamp': '2025-09-10 02:36:43.187138', 'step': 3099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:43.242736', 'step': 3099, 'epoch': 1} {'type': 'loss', 'content': 0.19629457592964172, 'timestamp': '2025-09-10 02:36:43.249651', 'step': 3100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:43.305264', 'step': 3100, 'epoch': 1} {'type': 'loss', 'content': 0.218811497092247, 'timestamp': '2025-09-10 02:36:43.307424', 'step': 3101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:43.362862', 'step': 3101, 'epoch': 1} {'type': 'loss', 'content': 0.1029987558722496, 'timestamp': '2025-09-10 02:36:43.365344', 'step': 3102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:43.420657', 'step': 3102, 'epoch': 1} {'type': 'loss', 'content': 0.18986201286315918, 'timestamp': '2025-09-10 02:36:43.423091', 'step': 3103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:43.479774', 'step': 3103, 'epoch': 1} {'type': 'loss', 'content': 0.10821060091257095, 'timestamp': '2025-09-10 02:36:43.487767', 'step': 3104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:43.542439', 'step': 3104, 'epoch': 1} {'type': 'loss', 'content': 0.24084968864917755, 'timestamp': '2025-09-10 02:36:43.544563', 'step': 3105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:43.599781', 'step': 3105, 'epoch': 1} {'type': 'loss', 'content': 0.1763685643672943, 'timestamp': '2025-09-10 02:36:43.602297', 'step': 3106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:43.657872', 'step': 3106, 'epoch': 1} {'type': 'loss', 'content': 0.10663679987192154, 'timestamp': '2025-09-10 02:36:43.660336', 'step': 3107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:43.717556', 'step': 3107, 'epoch': 1} {'type': 'loss', 'content': 0.1173812672495842, 'timestamp': '2025-09-10 02:36:43.724276', 'step': 3108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:43.778957', 'step': 3108, 'epoch': 1} {'type': 'loss', 'content': 0.1430976241827011, 'timestamp': '2025-09-10 02:36:43.781293', 'step': 3109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:43.837417', 'step': 3109, 'epoch': 1} {'type': 'loss', 'content': 0.183147132396698, 'timestamp': '2025-09-10 02:36:43.839720', 'step': 3110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:43.895728', 'step': 3110, 'epoch': 1} {'type': 'loss', 'content': 0.11103165149688721, 'timestamp': '2025-09-10 02:36:43.898088', 'step': 3111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:43.954060', 'step': 3111, 'epoch': 1} {'type': 'loss', 'content': 0.20648817718029022, 'timestamp': '2025-09-10 02:36:43.960730', 'step': 3112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:44.015492', 'step': 3112, 'epoch': 1} {'type': 'loss', 'content': 0.1402042955160141, 'timestamp': '2025-09-10 02:36:44.017811', 'step': 3113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:44.073661', 'step': 3113, 'epoch': 1} {'type': 'loss', 'content': 0.1865811049938202, 'timestamp': '2025-09-10 02:36:44.075966', 'step': 3114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:44.132326', 'step': 3114, 'epoch': 1} {'type': 'loss', 'content': 0.15659381449222565, 'timestamp': '2025-09-10 02:36:44.134783', 'step': 3115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:44.192438', 'step': 3115, 'epoch': 1} {'type': 'loss', 'content': 0.293999046087265, 'timestamp': '2025-09-10 02:36:44.199412', 'step': 3116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:44.256392', 'step': 3116, 'epoch': 1} {'type': 'loss', 'content': 0.1739179790019989, 'timestamp': '2025-09-10 02:36:44.258863', 'step': 3117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:44.315866', 'step': 3117, 'epoch': 1} {'type': 'loss', 'content': 0.15701766312122345, 'timestamp': '2025-09-10 02:36:44.318335', 'step': 3118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:44.374913', 'step': 3118, 'epoch': 1} {'type': 'loss', 'content': 0.1472284197807312, 'timestamp': '2025-09-10 02:36:44.377503', 'step': 3119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:44.433250', 'step': 3119, 'epoch': 1} {'type': 'loss', 'content': 0.15741568803787231, 'timestamp': '2025-09-10 02:36:44.439793', 'step': 3120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:44.495814', 'step': 3120, 'epoch': 1} {'type': 'loss', 'content': 0.13939815759658813, 'timestamp': '2025-09-10 02:36:44.498152', 'step': 3121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:44.555286', 'step': 3121, 'epoch': 1} {'type': 'loss', 'content': 0.20122916996479034, 'timestamp': '2025-09-10 02:36:44.557493', 'step': 3122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:44.614297', 'step': 3122, 'epoch': 1} {'type': 'loss', 'content': 0.14561517536640167, 'timestamp': '2025-09-10 02:36:44.616533', 'step': 3123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:44.673827', 'step': 3123, 'epoch': 1} {'type': 'loss', 'content': 0.15077383816242218, 'timestamp': '2025-09-10 02:36:44.680545', 'step': 3124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:44.740881', 'step': 3124, 'epoch': 1} {'type': 'loss', 'content': 0.15854007005691528, 'timestamp': '2025-09-10 02:36:44.743297', 'step': 3125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:44.801900', 'step': 3125, 'epoch': 1} {'type': 'loss', 'content': 0.1797088086605072, 'timestamp': '2025-09-10 02:36:44.804439', 'step': 3126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:44.863022', 'step': 3126, 'epoch': 1} {'type': 'loss', 'content': 0.18211516737937927, 'timestamp': '2025-09-10 02:36:44.865321', 'step': 3127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:44.921835', 'step': 3127, 'epoch': 1} {'type': 'loss', 'content': 0.14451561868190765, 'timestamp': '2025-09-10 02:36:44.928621', 'step': 3128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:44.984497', 'step': 3128, 'epoch': 1} {'type': 'loss', 'content': 0.1438654214143753, 'timestamp': '2025-09-10 02:36:44.987018', 'step': 3129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:45.043093', 'step': 3129, 'epoch': 1} {'type': 'loss', 'content': 0.29063311219215393, 'timestamp': '2025-09-10 02:36:45.045701', 'step': 3130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:45.103890', 'step': 3130, 'epoch': 1} {'type': 'loss', 'content': 0.13377611339092255, 'timestamp': '2025-09-10 02:36:45.106632', 'step': 3131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:45.163255', 'step': 3131, 'epoch': 1} {'type': 'loss', 'content': 0.15067347884178162, 'timestamp': '2025-09-10 02:36:45.170208', 'step': 3132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:45.225461', 'step': 3132, 'epoch': 1} {'type': 'loss', 'content': 0.20936398208141327, 'timestamp': '2025-09-10 02:36:45.228096', 'step': 3133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:45.284252', 'step': 3133, 'epoch': 1} {'type': 'loss', 'content': 0.14039358496665955, 'timestamp': '2025-09-10 02:36:45.286646', 'step': 3134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:45.342850', 'step': 3134, 'epoch': 1} {'type': 'loss', 'content': 0.1905057430267334, 'timestamp': '2025-09-10 02:36:45.345264', 'step': 3135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:45.400908', 'step': 3135, 'epoch': 1} {'type': 'loss', 'content': 0.1789485067129135, 'timestamp': '2025-09-10 02:36:45.407810', 'step': 3136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:45.464219', 'step': 3136, 'epoch': 1} {'type': 'loss', 'content': 0.1525665670633316, 'timestamp': '2025-09-10 02:36:45.466393', 'step': 3137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:45.522227', 'step': 3137, 'epoch': 1} {'type': 'loss', 'content': 0.11320290714502335, 'timestamp': '2025-09-10 02:36:45.528380', 'step': 3138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:45.584631', 'step': 3138, 'epoch': 1} {'type': 'loss', 'content': 0.12607938051223755, 'timestamp': '2025-09-10 02:36:45.587066', 'step': 3139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:45.643482', 'step': 3139, 'epoch': 1} {'type': 'loss', 'content': 0.21425579488277435, 'timestamp': '2025-09-10 02:36:45.650119', 'step': 3140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:45.707477', 'step': 3140, 'epoch': 1} {'type': 'loss', 'content': 0.22405387461185455, 'timestamp': '2025-09-10 02:36:45.709733', 'step': 3141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:45.765748', 'step': 3141, 'epoch': 1} {'type': 'loss', 'content': 0.14332161843776703, 'timestamp': '2025-09-10 02:36:45.768090', 'step': 3142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:45.823321', 'step': 3142, 'epoch': 1} {'type': 'loss', 'content': 0.3478378355503082, 'timestamp': '2025-09-10 02:36:45.827439', 'step': 3143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:45.885522', 'step': 3143, 'epoch': 1} {'type': 'loss', 'content': 0.21078108251094818, 'timestamp': '2025-09-10 02:36:45.892368', 'step': 3144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:45.945451', 'step': 3144, 'epoch': 1} {'type': 'loss', 'content': 0.16775885224342346, 'timestamp': '2025-09-10 02:36:45.947837', 'step': 3145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:46.002385', 'step': 3145, 'epoch': 1} {'type': 'loss', 'content': 0.2226664125919342, 'timestamp': '2025-09-10 02:36:46.004599', 'step': 3146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:46.058421', 'step': 3146, 'epoch': 1} {'type': 'loss', 'content': 0.1696440428495407, 'timestamp': '2025-09-10 02:36:46.060878', 'step': 3147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:46.115677', 'step': 3147, 'epoch': 1} {'type': 'loss', 'content': 0.2670271098613739, 'timestamp': '2025-09-10 02:36:46.122066', 'step': 3148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:46.181158', 'step': 3148, 'epoch': 1} {'type': 'loss', 'content': 0.19368021190166473, 'timestamp': '2025-09-10 02:36:46.183497', 'step': 3149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:46.238449', 'step': 3149, 'epoch': 1} {'type': 'loss', 'content': 0.19258055090904236, 'timestamp': '2025-09-10 02:36:46.241576', 'step': 3150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:46.298687', 'step': 3150, 'epoch': 1} {'type': 'loss', 'content': 0.31429219245910645, 'timestamp': '2025-09-10 02:36:46.301007', 'step': 3151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:46.355241', 'step': 3151, 'epoch': 1} {'type': 'loss', 'content': 0.14431583881378174, 'timestamp': '2025-09-10 02:36:46.361774', 'step': 3152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:46.418701', 'step': 3152, 'epoch': 1} {'type': 'loss', 'content': 0.16263461112976074, 'timestamp': '2025-09-10 02:36:46.421098', 'step': 3153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:46.483754', 'step': 3153, 'epoch': 1} {'type': 'loss', 'content': 0.12919148802757263, 'timestamp': '2025-09-10 02:36:46.486099', 'step': 3154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:46.543632', 'step': 3154, 'epoch': 1} {'type': 'loss', 'content': 0.18342645466327667, 'timestamp': '2025-09-10 02:36:46.546784', 'step': 3155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:46.604218', 'step': 3155, 'epoch': 1} {'type': 'loss', 'content': 0.2109769582748413, 'timestamp': '2025-09-10 02:36:46.611294', 'step': 3156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:46.667542', 'step': 3156, 'epoch': 1} {'type': 'loss', 'content': 0.21709972620010376, 'timestamp': '2025-09-10 02:36:46.669853', 'step': 3157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:46.725465', 'step': 3157, 'epoch': 1} {'type': 'loss', 'content': 0.2439221888780594, 'timestamp': '2025-09-10 02:36:46.729436', 'step': 3158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:46.787100', 'step': 3158, 'epoch': 1} {'type': 'loss', 'content': 0.13257908821105957, 'timestamp': '2025-09-10 02:36:46.789593', 'step': 3159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:46.848201', 'step': 3159, 'epoch': 1} {'type': 'loss', 'content': 0.178989440202713, 'timestamp': '2025-09-10 02:36:46.855062', 'step': 3160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:46.912142', 'step': 3160, 'epoch': 1} {'type': 'loss', 'content': 0.18262054026126862, 'timestamp': '2025-09-10 02:36:46.914880', 'step': 3161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:46.973291', 'step': 3161, 'epoch': 1} {'type': 'loss', 'content': 0.14462201297283173, 'timestamp': '2025-09-10 02:36:46.975744', 'step': 3162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:47.032076', 'step': 3162, 'epoch': 1} {'type': 'loss', 'content': 0.2508303225040436, 'timestamp': '2025-09-10 02:36:47.034608', 'step': 3163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:47.090323', 'step': 3163, 'epoch': 1} {'type': 'loss', 'content': 0.24913251399993896, 'timestamp': '2025-09-10 02:36:47.096712', 'step': 3164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:47.151223', 'step': 3164, 'epoch': 1} {'type': 'loss', 'content': 0.2940349280834198, 'timestamp': '2025-09-10 02:36:47.153575', 'step': 3165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:47.208223', 'step': 3165, 'epoch': 1} {'type': 'loss', 'content': 0.20427165925502777, 'timestamp': '2025-09-10 02:36:47.210306', 'step': 3166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:47.265731', 'step': 3166, 'epoch': 1} {'type': 'loss', 'content': 0.20943592488765717, 'timestamp': '2025-09-10 02:36:47.267940', 'step': 3167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:47.322767', 'step': 3167, 'epoch': 1} {'type': 'loss', 'content': 0.2126007229089737, 'timestamp': '2025-09-10 02:36:47.329088', 'step': 3168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:47.383278', 'step': 3168, 'epoch': 1} {'type': 'loss', 'content': 0.09537112712860107, 'timestamp': '2025-09-10 02:36:47.385801', 'step': 3169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:47.442150', 'step': 3169, 'epoch': 1} {'type': 'loss', 'content': 0.12254435569047928, 'timestamp': '2025-09-10 02:36:47.444638', 'step': 3170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:47.500868', 'step': 3170, 'epoch': 1} {'type': 'loss', 'content': 0.15823394060134888, 'timestamp': '2025-09-10 02:36:47.503051', 'step': 3171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:47.558762', 'step': 3171, 'epoch': 1} {'type': 'loss', 'content': 0.22679029405117035, 'timestamp': '2025-09-10 02:36:47.565393', 'step': 3172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:47.621200', 'step': 3172, 'epoch': 1} {'type': 'loss', 'content': 0.18034714460372925, 'timestamp': '2025-09-10 02:36:47.623475', 'step': 3173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:47.680047', 'step': 3173, 'epoch': 1} {'type': 'loss', 'content': 0.1609453409910202, 'timestamp': '2025-09-10 02:36:47.682317', 'step': 3174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:47.742484', 'step': 3174, 'epoch': 1} {'type': 'loss', 'content': 0.1776101440191269, 'timestamp': '2025-09-10 02:36:47.744840', 'step': 3175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:47.801192', 'step': 3175, 'epoch': 1} {'type': 'loss', 'content': 0.23939566314220428, 'timestamp': '2025-09-10 02:36:47.807764', 'step': 3176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:47.863582', 'step': 3176, 'epoch': 1} {'type': 'loss', 'content': 0.18552565574645996, 'timestamp': '2025-09-10 02:36:47.865809', 'step': 3177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:47.922167', 'step': 3177, 'epoch': 1} {'type': 'loss', 'content': 0.21967418491840363, 'timestamp': '2025-09-10 02:36:47.924655', 'step': 3178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:47.980283', 'step': 3178, 'epoch': 1} {'type': 'loss', 'content': 0.17825306951999664, 'timestamp': '2025-09-10 02:36:47.982601', 'step': 3179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:48.039656', 'step': 3179, 'epoch': 1} {'type': 'loss', 'content': 0.15878038108348846, 'timestamp': '2025-09-10 02:36:48.046425', 'step': 3180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:48.101402', 'step': 3180, 'epoch': 1} {'type': 'loss', 'content': 0.16590698063373566, 'timestamp': '2025-09-10 02:36:48.103579', 'step': 3181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:48.158308', 'step': 3181, 'epoch': 1} {'type': 'loss', 'content': 0.14794114232063293, 'timestamp': '2025-09-10 02:36:48.160698', 'step': 3182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:48.215673', 'step': 3182, 'epoch': 1} {'type': 'loss', 'content': 0.18309596180915833, 'timestamp': '2025-09-10 02:36:48.217976', 'step': 3183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:48.272492', 'step': 3183, 'epoch': 1} {'type': 'loss', 'content': 0.18301454186439514, 'timestamp': '2025-09-10 02:36:48.279148', 'step': 3184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:48.333573', 'step': 3184, 'epoch': 1} {'type': 'loss', 'content': 0.2671540677547455, 'timestamp': '2025-09-10 02:36:48.335934', 'step': 3185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:48.390833', 'step': 3185, 'epoch': 1} {'type': 'loss', 'content': 0.21309739351272583, 'timestamp': '2025-09-10 02:36:48.393352', 'step': 3186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:48.448414', 'step': 3186, 'epoch': 1} {'type': 'loss', 'content': 0.17060421407222748, 'timestamp': '2025-09-10 02:36:48.450852', 'step': 3187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:48.505762', 'step': 3187, 'epoch': 1} {'type': 'loss', 'content': 0.23444391787052155, 'timestamp': '2025-09-10 02:36:48.512232', 'step': 3188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:48.567530', 'step': 3188, 'epoch': 1} {'type': 'loss', 'content': 0.17203351855278015, 'timestamp': '2025-09-10 02:36:48.569714', 'step': 3189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:48.624312', 'step': 3189, 'epoch': 1} {'type': 'loss', 'content': 0.1328628659248352, 'timestamp': '2025-09-10 02:36:48.626522', 'step': 3190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:48.681803', 'step': 3190, 'epoch': 1} {'type': 'loss', 'content': 0.1364295482635498, 'timestamp': '2025-09-10 02:36:48.683910', 'step': 3191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:48.738592', 'step': 3191, 'epoch': 1} {'type': 'loss', 'content': 0.14386770129203796, 'timestamp': '2025-09-10 02:36:48.744860', 'step': 3192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:48.798686', 'step': 3192, 'epoch': 1} {'type': 'loss', 'content': 0.11837732046842575, 'timestamp': '2025-09-10 02:36:48.800756', 'step': 3193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:48.854648', 'step': 3193, 'epoch': 1} {'type': 'loss', 'content': 0.20892252027988434, 'timestamp': '2025-09-10 02:36:48.856756', 'step': 3194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:48.911638', 'step': 3194, 'epoch': 1} {'type': 'loss', 'content': 0.14198507368564606, 'timestamp': '2025-09-10 02:36:48.913918', 'step': 3195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:48.968375', 'step': 3195, 'epoch': 1} {'type': 'loss', 'content': 0.13824310898780823, 'timestamp': '2025-09-10 02:36:48.974738', 'step': 3196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:49.029232', 'step': 3196, 'epoch': 1} {'type': 'loss', 'content': 0.09383617341518402, 'timestamp': '2025-09-10 02:36:49.031401', 'step': 3197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:49.088337', 'step': 3197, 'epoch': 1} {'type': 'loss', 'content': 0.09722209721803665, 'timestamp': '2025-09-10 02:36:49.090442', 'step': 3198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:49.147711', 'step': 3198, 'epoch': 1} {'type': 'loss', 'content': 0.22107155621051788, 'timestamp': '2025-09-10 02:36:49.149839', 'step': 3199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:49.206789', 'step': 3199, 'epoch': 1} {'type': 'loss', 'content': 0.13488668203353882, 'timestamp': '2025-09-10 02:36:49.213323', 'step': 3200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:49.268395', 'step': 3200, 'epoch': 1} {'type': 'loss', 'content': 0.20669463276863098, 'timestamp': '2025-09-10 02:36:49.270810', 'step': 3201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:49.325736', 'step': 3201, 'epoch': 1} {'type': 'loss', 'content': 0.2662254273891449, 'timestamp': '2025-09-10 02:36:49.328022', 'step': 3202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:49.383084', 'step': 3202, 'epoch': 1} {'type': 'loss', 'content': 0.1808696836233139, 'timestamp': '2025-09-10 02:36:49.385411', 'step': 3203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:49.440101', 'step': 3203, 'epoch': 1} {'type': 'loss', 'content': 0.1934121549129486, 'timestamp': '2025-09-10 02:36:49.446659', 'step': 3204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:49.500121', 'step': 3204, 'epoch': 1} {'type': 'loss', 'content': 0.22653351724147797, 'timestamp': '2025-09-10 02:36:49.502314', 'step': 3205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:49.557707', 'step': 3205, 'epoch': 1} {'type': 'loss', 'content': 0.16443462669849396, 'timestamp': '2025-09-10 02:36:49.560078', 'step': 3206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:49.615322', 'step': 3206, 'epoch': 1} {'type': 'loss', 'content': 0.13181334733963013, 'timestamp': '2025-09-10 02:36:49.617822', 'step': 3207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:49.672056', 'step': 3207, 'epoch': 1} {'type': 'loss', 'content': 0.15095852315425873, 'timestamp': '2025-09-10 02:36:49.678550', 'step': 3208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:49.732235', 'step': 3208, 'epoch': 1} {'type': 'loss', 'content': 0.09925267845392227, 'timestamp': '2025-09-10 02:36:49.734720', 'step': 3209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:49.790206', 'step': 3209, 'epoch': 1} {'type': 'loss', 'content': 0.2408217191696167, 'timestamp': '2025-09-10 02:36:49.792534', 'step': 3210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:49.847119', 'step': 3210, 'epoch': 1} {'type': 'loss', 'content': 0.21042640507221222, 'timestamp': '2025-09-10 02:36:49.849590', 'step': 3211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:49.903241', 'step': 3211, 'epoch': 1} {'type': 'loss', 'content': 0.1567915976047516, 'timestamp': '2025-09-10 02:36:49.909128', 'step': 3212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:49.963096', 'step': 3212, 'epoch': 1} {'type': 'loss', 'content': 0.16156886518001556, 'timestamp': '2025-09-10 02:36:49.965367', 'step': 3213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:50.019790', 'step': 3213, 'epoch': 1} {'type': 'loss', 'content': 0.1524890661239624, 'timestamp': '2025-09-10 02:36:50.022125', 'step': 3214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:50.078156', 'step': 3214, 'epoch': 1} {'type': 'loss', 'content': 0.20291800796985626, 'timestamp': '2025-09-10 02:36:50.080493', 'step': 3215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:50.136209', 'step': 3215, 'epoch': 1} {'type': 'loss', 'content': 0.14089730381965637, 'timestamp': '2025-09-10 02:36:50.142937', 'step': 3216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:50.197978', 'step': 3216, 'epoch': 1} {'type': 'loss', 'content': 0.2411174476146698, 'timestamp': '2025-09-10 02:36:50.200422', 'step': 3217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:50.254242', 'step': 3217, 'epoch': 1} {'type': 'loss', 'content': 0.15743201971054077, 'timestamp': '2025-09-10 02:36:50.256771', 'step': 3218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:50.311187', 'step': 3218, 'epoch': 1} {'type': 'loss', 'content': 0.17506645619869232, 'timestamp': '2025-09-10 02:36:50.313641', 'step': 3219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:50.368113', 'step': 3219, 'epoch': 1} {'type': 'loss', 'content': 0.1350892037153244, 'timestamp': '2025-09-10 02:36:50.374763', 'step': 3220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:50.428160', 'step': 3220, 'epoch': 1} {'type': 'loss', 'content': 0.19773222506046295, 'timestamp': '2025-09-10 02:36:50.430242', 'step': 3221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:50.484244', 'step': 3221, 'epoch': 1} {'type': 'loss', 'content': 0.17100806534290314, 'timestamp': '2025-09-10 02:36:50.486473', 'step': 3222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:50.539656', 'step': 3222, 'epoch': 1} {'type': 'loss', 'content': 0.2665916383266449, 'timestamp': '2025-09-10 02:36:50.541967', 'step': 3223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:50.596291', 'step': 3223, 'epoch': 1} {'type': 'loss', 'content': 0.13951155543327332, 'timestamp': '2025-09-10 02:36:50.602607', 'step': 3224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:50.656065', 'step': 3224, 'epoch': 1} {'type': 'loss', 'content': 0.17409886419773102, 'timestamp': '2025-09-10 02:36:50.658335', 'step': 3225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:50.713236', 'step': 3225, 'epoch': 1} {'type': 'loss', 'content': 0.1511145830154419, 'timestamp': '2025-09-10 02:36:50.715433', 'step': 3226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:50.772298', 'step': 3226, 'epoch': 1} {'type': 'loss', 'content': 0.1802312731742859, 'timestamp': '2025-09-10 02:36:50.774693', 'step': 3227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:50.829048', 'step': 3227, 'epoch': 1} {'type': 'loss', 'content': 0.2890918552875519, 'timestamp': '2025-09-10 02:36:50.835507', 'step': 3228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:50.888705', 'step': 3228, 'epoch': 1} {'type': 'loss', 'content': 0.10762922465801239, 'timestamp': '2025-09-10 02:36:50.891228', 'step': 3229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:50.946388', 'step': 3229, 'epoch': 1} {'type': 'loss', 'content': 0.17840522527694702, 'timestamp': '2025-09-10 02:36:50.948608', 'step': 3230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:51.003513', 'step': 3230, 'epoch': 1} {'type': 'loss', 'content': 0.19459035992622375, 'timestamp': '2025-09-10 02:36:51.005815', 'step': 3231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:51.059644', 'step': 3231, 'epoch': 1} {'type': 'loss', 'content': 0.21664950251579285, 'timestamp': '2025-09-10 02:36:51.065920', 'step': 3232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:51.120050', 'step': 3232, 'epoch': 1} {'type': 'loss', 'content': 0.10931815952062607, 'timestamp': '2025-09-10 02:36:51.122318', 'step': 3233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:51.178404', 'step': 3233, 'epoch': 1} {'type': 'loss', 'content': 0.12039615213871002, 'timestamp': '2025-09-10 02:36:51.180729', 'step': 3234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:36:51.235589', 'step': 3234, 'epoch': 1} {'type': 'loss', 'content': 0.21255357563495636, 'timestamp': '2025-09-10 02:36:51.237770', 'step': 3235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:51.292727', 'step': 3235, 'epoch': 1} {'type': 'loss', 'content': 0.17433680593967438, 'timestamp': '2025-09-10 02:36:51.299066', 'step': 3236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:51.354091', 'step': 3236, 'epoch': 1} {'type': 'loss', 'content': 0.24551624059677124, 'timestamp': '2025-09-10 02:36:51.356359', 'step': 3237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:51.411315', 'step': 3237, 'epoch': 1} {'type': 'loss', 'content': 0.1845678687095642, 'timestamp': '2025-09-10 02:36:51.413521', 'step': 3238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:51.467466', 'step': 3238, 'epoch': 1} {'type': 'loss', 'content': 0.1402643322944641, 'timestamp': '2025-09-10 02:36:51.469640', 'step': 3239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:51.524278', 'step': 3239, 'epoch': 1} {'type': 'loss', 'content': 0.2002113163471222, 'timestamp': '2025-09-10 02:36:51.530754', 'step': 3240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:51.585793', 'step': 3240, 'epoch': 1} {'type': 'loss', 'content': 0.1799054592847824, 'timestamp': '2025-09-10 02:36:51.588141', 'step': 3241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:51.642504', 'step': 3241, 'epoch': 1} {'type': 'loss', 'content': 0.22434526681900024, 'timestamp': '2025-09-10 02:36:51.644686', 'step': 3242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:51.698600', 'step': 3242, 'epoch': 1} {'type': 'loss', 'content': 0.13781332969665527, 'timestamp': '2025-09-10 02:36:51.700553', 'step': 3243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:51.755469', 'step': 3243, 'epoch': 1} {'type': 'loss', 'content': 0.25467145442962646, 'timestamp': '2025-09-10 02:36:51.761886', 'step': 3244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:51.815786', 'step': 3244, 'epoch': 1} {'type': 'loss', 'content': 0.09692112356424332, 'timestamp': '2025-09-10 02:36:51.817845', 'step': 3245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:51.871759', 'step': 3245, 'epoch': 1} {'type': 'loss', 'content': 0.22256623208522797, 'timestamp': '2025-09-10 02:36:51.874104', 'step': 3246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:51.928439', 'step': 3246, 'epoch': 1} {'type': 'loss', 'content': 0.09821254014968872, 'timestamp': '2025-09-10 02:36:51.930791', 'step': 3247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:51.985486', 'step': 3247, 'epoch': 1} {'type': 'loss', 'content': 0.21097396314144135, 'timestamp': '2025-09-10 02:36:51.991828', 'step': 3248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:52.045987', 'step': 3248, 'epoch': 1} {'type': 'loss', 'content': 0.2460300177335739, 'timestamp': '2025-09-10 02:36:52.048393', 'step': 3249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:52.101960', 'step': 3249, 'epoch': 1} {'type': 'loss', 'content': 0.19580750167369843, 'timestamp': '2025-09-10 02:36:52.104112', 'step': 3250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:52.158415', 'step': 3250, 'epoch': 1} {'type': 'loss', 'content': 0.17436504364013672, 'timestamp': '2025-09-10 02:36:52.160833', 'step': 3251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:52.216475', 'step': 3251, 'epoch': 1} {'type': 'loss', 'content': 0.1546694040298462, 'timestamp': '2025-09-10 02:36:52.222940', 'step': 3252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:52.277293', 'step': 3252, 'epoch': 1} {'type': 'loss', 'content': 0.13907679915428162, 'timestamp': '2025-09-10 02:36:52.279448', 'step': 3253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:52.334207', 'step': 3253, 'epoch': 1} {'type': 'loss', 'content': 0.0987868383526802, 'timestamp': '2025-09-10 02:36:52.336356', 'step': 3254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:52.394567', 'step': 3254, 'epoch': 1} {'type': 'loss', 'content': 0.1427401453256607, 'timestamp': '2025-09-10 02:36:52.397193', 'step': 3255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:52.453084', 'step': 3255, 'epoch': 1} {'type': 'loss', 'content': 0.08324720710515976, 'timestamp': '2025-09-10 02:36:52.459739', 'step': 3256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:52.514743', 'step': 3256, 'epoch': 1} {'type': 'loss', 'content': 0.1680658906698227, 'timestamp': '2025-09-10 02:36:52.516798', 'step': 3257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:52.570875', 'step': 3257, 'epoch': 1} {'type': 'loss', 'content': 0.19848527014255524, 'timestamp': '2025-09-10 02:36:52.573021', 'step': 3258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:52.627521', 'step': 3258, 'epoch': 1} {'type': 'loss', 'content': 0.23154743015766144, 'timestamp': '2025-09-10 02:36:52.629653', 'step': 3259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:52.684462', 'step': 3259, 'epoch': 1} {'type': 'loss', 'content': 0.14072439074516296, 'timestamp': '2025-09-10 02:36:52.690750', 'step': 3260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:36:52.744417', 'step': 3260, 'epoch': 1} {'type': 'loss', 'content': 0.18686991930007935, 'timestamp': '2025-09-10 02:36:52.746647', 'step': 3261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:36:52.801509', 'step': 3261, 'epoch': 1} {'type': 'loss', 'content': 0.12250446528196335, 'timestamp': '2025-09-10 02:36:52.803734', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:37:05.906276', 'step': 3262, 'epoch': 1} {'type': 'pplx', 'content': 11369.774781985348, 'timestamp': '2025-09-10 02:37:05.909271', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:05.966402', 'step': 3262, 'epoch': 1} {'type': 'loss', 'content': 0.18128640949726105, 'timestamp': '2025-09-10 02:37:05.968477', 'step': 3263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:06.026533', 'step': 3263, 'epoch': 1} {'type': 'loss', 'content': 0.19322000443935394, 'timestamp': '2025-09-10 02:37:06.033111', 'step': 3264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:06.093754', 'step': 3264, 'epoch': 1} {'type': 'loss', 'content': 0.12784788012504578, 'timestamp': '2025-09-10 02:37:06.095926', 'step': 3265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:06.154110', 'step': 3265, 'epoch': 1} {'type': 'loss', 'content': 0.2532922923564911, 'timestamp': '2025-09-10 02:37:06.156251', 'step': 3266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:06.214613', 'step': 3266, 'epoch': 1} {'type': 'loss', 'content': 0.23860596120357513, 'timestamp': '2025-09-10 02:37:06.216535', 'step': 3267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:06.278084', 'step': 3267, 'epoch': 1} {'type': 'loss', 'content': 0.13655751943588257, 'timestamp': '2025-09-10 02:37:06.284513', 'step': 3268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:06.341473', 'step': 3268, 'epoch': 1} {'type': 'loss', 'content': 0.19707848131656647, 'timestamp': '2025-09-10 02:37:06.343685', 'step': 3269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:06.399739', 'step': 3269, 'epoch': 1} {'type': 'loss', 'content': 0.19298134744167328, 'timestamp': '2025-09-10 02:37:06.401850', 'step': 3270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:06.461159', 'step': 3270, 'epoch': 1} {'type': 'loss', 'content': 0.1702038198709488, 'timestamp': '2025-09-10 02:37:06.463098', 'step': 3271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:06.519004', 'step': 3271, 'epoch': 1} {'type': 'loss', 'content': 0.08553274720907211, 'timestamp': '2025-09-10 02:37:06.524984', 'step': 3272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:06.580298', 'step': 3272, 'epoch': 1} {'type': 'loss', 'content': 0.18911486864089966, 'timestamp': '2025-09-10 02:37:06.582213', 'step': 3273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:06.638942', 'step': 3273, 'epoch': 1} {'type': 'loss', 'content': 0.2063717097043991, 'timestamp': '2025-09-10 02:37:06.640865', 'step': 3274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:06.696843', 'step': 3274, 'epoch': 1} {'type': 'loss', 'content': 0.09145600348711014, 'timestamp': '2025-09-10 02:37:06.698579', 'step': 3275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:06.754158', 'step': 3275, 'epoch': 1} {'type': 'loss', 'content': 0.1498914211988449, 'timestamp': '2025-09-10 02:37:06.760298', 'step': 3276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:06.815290', 'step': 3276, 'epoch': 1} {'type': 'loss', 'content': 0.185755655169487, 'timestamp': '2025-09-10 02:37:06.817019', 'step': 3277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:06.872062', 'step': 3277, 'epoch': 1} {'type': 'loss', 'content': 0.10180742293596268, 'timestamp': '2025-09-10 02:37:06.873931', 'step': 3278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:06.929035', 'step': 3278, 'epoch': 1} {'type': 'loss', 'content': 0.21391397714614868, 'timestamp': '2025-09-10 02:37:06.931008', 'step': 3279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:06.987441', 'step': 3279, 'epoch': 1} {'type': 'loss', 'content': 0.15394441783428192, 'timestamp': '2025-09-10 02:37:06.993550', 'step': 3280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:07.047291', 'step': 3280, 'epoch': 1} {'type': 'loss', 'content': 0.1212393045425415, 'timestamp': '2025-09-10 02:37:07.049378', 'step': 3281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:07.103917', 'step': 3281, 'epoch': 1} {'type': 'loss', 'content': 0.08086612820625305, 'timestamp': '2025-09-10 02:37:07.105902', 'step': 3282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:07.160304', 'step': 3282, 'epoch': 1} {'type': 'loss', 'content': 0.20916566252708435, 'timestamp': '2025-09-10 02:37:07.162595', 'step': 3283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:07.216818', 'step': 3283, 'epoch': 1} {'type': 'loss', 'content': 0.181152805685997, 'timestamp': '2025-09-10 02:37:07.223188', 'step': 3284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:07.277768', 'step': 3284, 'epoch': 1} {'type': 'loss', 'content': 0.2565261721611023, 'timestamp': '2025-09-10 02:37:07.281334', 'step': 3285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:07.336815', 'step': 3285, 'epoch': 1} {'type': 'loss', 'content': 0.2100062370300293, 'timestamp': '2025-09-10 02:37:07.339315', 'step': 3286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:07.393908', 'step': 3286, 'epoch': 1} {'type': 'loss', 'content': 0.13899923861026764, 'timestamp': '2025-09-10 02:37:07.396016', 'step': 3287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:07.450692', 'step': 3287, 'epoch': 1} {'type': 'loss', 'content': 0.2112337052822113, 'timestamp': '2025-09-10 02:37:07.456862', 'step': 3288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:07.514185', 'step': 3288, 'epoch': 1} {'type': 'loss', 'content': 0.20590654015541077, 'timestamp': '2025-09-10 02:37:07.516854', 'step': 3289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:07.573762', 'step': 3289, 'epoch': 1} {'type': 'loss', 'content': 0.3042148947715759, 'timestamp': '2025-09-10 02:37:07.577665', 'step': 3290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:07.634171', 'step': 3290, 'epoch': 1} {'type': 'loss', 'content': 0.17452400922775269, 'timestamp': '2025-09-10 02:37:07.636140', 'step': 3291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:07.691655', 'step': 3291, 'epoch': 1} {'type': 'loss', 'content': 0.06093311309814453, 'timestamp': '2025-09-10 02:37:07.697505', 'step': 3292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:07.750747', 'step': 3292, 'epoch': 1} {'type': 'loss', 'content': 0.3024410605430603, 'timestamp': '2025-09-10 02:37:07.752462', 'step': 3293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:07.805836', 'step': 3293, 'epoch': 1} {'type': 'loss', 'content': 0.184453547000885, 'timestamp': '2025-09-10 02:37:07.807548', 'step': 3294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:07.860009', 'step': 3294, 'epoch': 1} {'type': 'loss', 'content': 0.1434633582830429, 'timestamp': '2025-09-10 02:37:07.862125', 'step': 3295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:07.915236', 'step': 3295, 'epoch': 1} {'type': 'loss', 'content': 0.14730143547058105, 'timestamp': '2025-09-10 02:37:07.920955', 'step': 3296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:07.973332', 'step': 3296, 'epoch': 1} {'type': 'loss', 'content': 0.13688510656356812, 'timestamp': '2025-09-10 02:37:07.975046', 'step': 3297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:08.028094', 'step': 3297, 'epoch': 1} {'type': 'loss', 'content': 0.13820220530033112, 'timestamp': '2025-09-10 02:37:08.029874', 'step': 3298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:08.084396', 'step': 3298, 'epoch': 1} {'type': 'loss', 'content': 0.12748171389102936, 'timestamp': '2025-09-10 02:37:08.086292', 'step': 3299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:08.139828', 'step': 3299, 'epoch': 1} {'type': 'loss', 'content': 0.17315585911273956, 'timestamp': '2025-09-10 02:37:08.145703', 'step': 3300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:08.199786', 'step': 3300, 'epoch': 1} {'type': 'loss', 'content': 0.15617826581001282, 'timestamp': '2025-09-10 02:37:08.201561', 'step': 3301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:08.254622', 'step': 3301, 'epoch': 1} {'type': 'loss', 'content': 0.25232312083244324, 'timestamp': '2025-09-10 02:37:08.256983', 'step': 3302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:08.310267', 'step': 3302, 'epoch': 1} {'type': 'loss', 'content': 0.21929793059825897, 'timestamp': '2025-09-10 02:37:08.312007', 'step': 3303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:08.364772', 'step': 3303, 'epoch': 1} {'type': 'loss', 'content': 0.14600619673728943, 'timestamp': '2025-09-10 02:37:08.370422', 'step': 3304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:08.422932', 'step': 3304, 'epoch': 1} {'type': 'loss', 'content': 0.2685668468475342, 'timestamp': '2025-09-10 02:37:08.424903', 'step': 3305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:08.478571', 'step': 3305, 'epoch': 1} {'type': 'loss', 'content': 0.14018630981445312, 'timestamp': '2025-09-10 02:37:08.480537', 'step': 3306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:08.534907', 'step': 3306, 'epoch': 1} {'type': 'loss', 'content': 0.1730273962020874, 'timestamp': '2025-09-10 02:37:08.537009', 'step': 3307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:08.592258', 'step': 3307, 'epoch': 1} {'type': 'loss', 'content': 0.16271066665649414, 'timestamp': '2025-09-10 02:37:08.598189', 'step': 3308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:08.651085', 'step': 3308, 'epoch': 1} {'type': 'loss', 'content': 0.16141866147518158, 'timestamp': '2025-09-10 02:37:08.652946', 'step': 3309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:08.706771', 'step': 3309, 'epoch': 1} {'type': 'loss', 'content': 0.16516780853271484, 'timestamp': '2025-09-10 02:37:08.708770', 'step': 3310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:08.762403', 'step': 3310, 'epoch': 1} {'type': 'loss', 'content': 0.22790010273456573, 'timestamp': '2025-09-10 02:37:08.764308', 'step': 3311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:08.817505', 'step': 3311, 'epoch': 1} {'type': 'loss', 'content': 0.18078893423080444, 'timestamp': '2025-09-10 02:37:08.823329', 'step': 3312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:08.876223', 'step': 3312, 'epoch': 1} {'type': 'loss', 'content': 0.14064224064350128, 'timestamp': '2025-09-10 02:37:08.878176', 'step': 3313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:08.931022', 'step': 3313, 'epoch': 1} {'type': 'loss', 'content': 0.17774859070777893, 'timestamp': '2025-09-10 02:37:08.933273', 'step': 3314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:08.986837', 'step': 3314, 'epoch': 1} {'type': 'loss', 'content': 0.14472007751464844, 'timestamp': '2025-09-10 02:37:08.989020', 'step': 3315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:09.042647', 'step': 3315, 'epoch': 1} {'type': 'loss', 'content': 0.13525231182575226, 'timestamp': '2025-09-10 02:37:09.048433', 'step': 3316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:09.102062', 'step': 3316, 'epoch': 1} {'type': 'loss', 'content': 0.13597163558006287, 'timestamp': '2025-09-10 02:37:09.104181', 'step': 3317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:09.163995', 'step': 3317, 'epoch': 1} {'type': 'loss', 'content': 0.15371015667915344, 'timestamp': '2025-09-10 02:37:09.165981', 'step': 3318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:09.219993', 'step': 3318, 'epoch': 1} {'type': 'loss', 'content': 0.18050514161586761, 'timestamp': '2025-09-10 02:37:09.222028', 'step': 3319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:09.278001', 'step': 3319, 'epoch': 1} {'type': 'loss', 'content': 0.13387911021709442, 'timestamp': '2025-09-10 02:37:09.283872', 'step': 3320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:09.336483', 'step': 3320, 'epoch': 1} {'type': 'loss', 'content': 0.21739353239536285, 'timestamp': '2025-09-10 02:37:09.338494', 'step': 3321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:09.393253', 'step': 3321, 'epoch': 1} {'type': 'loss', 'content': 0.11250811070203781, 'timestamp': '2025-09-10 02:37:09.395426', 'step': 3322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:09.449121', 'step': 3322, 'epoch': 1} {'type': 'loss', 'content': 0.10827266424894333, 'timestamp': '2025-09-10 02:37:09.451187', 'step': 3323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:09.503898', 'step': 3323, 'epoch': 1} {'type': 'loss', 'content': 0.14835268259048462, 'timestamp': '2025-09-10 02:37:09.509741', 'step': 3324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:09.562228', 'step': 3324, 'epoch': 1} {'type': 'loss', 'content': 0.3068135678768158, 'timestamp': '2025-09-10 02:37:09.563938', 'step': 3325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:09.617362', 'step': 3325, 'epoch': 1} {'type': 'loss', 'content': 0.18224871158599854, 'timestamp': '2025-09-10 02:37:09.619220', 'step': 3326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:09.672826', 'step': 3326, 'epoch': 1} {'type': 'loss', 'content': 0.23977428674697876, 'timestamp': '2025-09-10 02:37:09.674615', 'step': 3327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:09.727866', 'step': 3327, 'epoch': 1} {'type': 'loss', 'content': 0.19780702888965607, 'timestamp': '2025-09-10 02:37:09.733970', 'step': 3328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:09.786877', 'step': 3328, 'epoch': 1} {'type': 'loss', 'content': 0.2157621681690216, 'timestamp': '2025-09-10 02:37:09.788999', 'step': 3329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:09.842588', 'step': 3329, 'epoch': 1} {'type': 'loss', 'content': 0.15308550000190735, 'timestamp': '2025-09-10 02:37:09.844618', 'step': 3330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:09.897940', 'step': 3330, 'epoch': 1} {'type': 'loss', 'content': 0.09704054147005081, 'timestamp': '2025-09-10 02:37:09.900003', 'step': 3331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:09.953308', 'step': 3331, 'epoch': 1} {'type': 'loss', 'content': 0.19815510511398315, 'timestamp': '2025-09-10 02:37:09.959155', 'step': 3332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:10.012036', 'step': 3332, 'epoch': 1} {'type': 'loss', 'content': 0.12097311019897461, 'timestamp': '2025-09-10 02:37:10.014014', 'step': 3333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:10.067439', 'step': 3333, 'epoch': 1} {'type': 'loss', 'content': 0.21763916313648224, 'timestamp': '2025-09-10 02:37:10.069502', 'step': 3334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:10.123274', 'step': 3334, 'epoch': 1} {'type': 'loss', 'content': 0.19225074350833893, 'timestamp': '2025-09-10 02:37:10.125243', 'step': 3335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:10.179309', 'step': 3335, 'epoch': 1} {'type': 'loss', 'content': 0.17779655754566193, 'timestamp': '2025-09-10 02:37:10.185356', 'step': 3336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:10.238490', 'step': 3336, 'epoch': 1} {'type': 'loss', 'content': 0.12059081345796585, 'timestamp': '2025-09-10 02:37:10.240556', 'step': 3337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:10.294431', 'step': 3337, 'epoch': 1} {'type': 'loss', 'content': 0.14612089097499847, 'timestamp': '2025-09-10 02:37:10.296483', 'step': 3338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:10.350899', 'step': 3338, 'epoch': 1} {'type': 'loss', 'content': 0.27775874733924866, 'timestamp': '2025-09-10 02:37:10.352970', 'step': 3339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:37:10.423699', 'step': 3339, 'epoch': 1} {'type': 'loss', 'content': 0.2024090439081192, 'timestamp': '2025-09-10 02:37:10.436994', 'step': 3340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:10.490701', 'step': 3340, 'epoch': 1} {'type': 'loss', 'content': 0.2344280481338501, 'timestamp': '2025-09-10 02:37:10.492632', 'step': 3341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:10.546322', 'step': 3341, 'epoch': 1} {'type': 'loss', 'content': 0.25891751050949097, 'timestamp': '2025-09-10 02:37:10.548457', 'step': 3342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:10.602545', 'step': 3342, 'epoch': 1} {'type': 'loss', 'content': 0.17503947019577026, 'timestamp': '2025-09-10 02:37:10.604619', 'step': 3343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:10.658327', 'step': 3343, 'epoch': 1} {'type': 'loss', 'content': 0.22712594270706177, 'timestamp': '2025-09-10 02:37:10.664410', 'step': 3344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:10.718746', 'step': 3344, 'epoch': 1} {'type': 'loss', 'content': 0.17755448818206787, 'timestamp': '2025-09-10 02:37:10.720717', 'step': 3345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:10.774451', 'step': 3345, 'epoch': 1} {'type': 'loss', 'content': 0.18759770691394806, 'timestamp': '2025-09-10 02:37:10.776413', 'step': 3346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:10.831441', 'step': 3346, 'epoch': 1} {'type': 'loss', 'content': 0.19691632688045502, 'timestamp': '2025-09-10 02:37:10.833487', 'step': 3347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:10.887220', 'step': 3347, 'epoch': 1} {'type': 'loss', 'content': 0.08351675420999527, 'timestamp': '2025-09-10 02:37:10.893114', 'step': 3348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:10.947430', 'step': 3348, 'epoch': 1} {'type': 'loss', 'content': 0.2892800271511078, 'timestamp': '2025-09-10 02:37:10.949639', 'step': 3349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:11.002185', 'step': 3349, 'epoch': 1} {'type': 'loss', 'content': 0.1960253268480301, 'timestamp': '2025-09-10 02:37:11.004092', 'step': 3350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:11.057851', 'step': 3350, 'epoch': 1} {'type': 'loss', 'content': 0.12695135176181793, 'timestamp': '2025-09-10 02:37:11.059953', 'step': 3351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:11.113780', 'step': 3351, 'epoch': 1} {'type': 'loss', 'content': 0.16053397953510284, 'timestamp': '2025-09-10 02:37:11.119752', 'step': 3352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:11.171699', 'step': 3352, 'epoch': 1} {'type': 'loss', 'content': 0.1335095465183258, 'timestamp': '2025-09-10 02:37:11.173738', 'step': 3353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:11.226671', 'step': 3353, 'epoch': 1} {'type': 'loss', 'content': 0.13947539031505585, 'timestamp': '2025-09-10 02:37:11.228608', 'step': 3354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:11.282316', 'step': 3354, 'epoch': 1} {'type': 'loss', 'content': 0.24397623538970947, 'timestamp': '2025-09-10 02:37:11.284297', 'step': 3355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:11.337618', 'step': 3355, 'epoch': 1} {'type': 'loss', 'content': 0.1942884624004364, 'timestamp': '2025-09-10 02:37:11.343536', 'step': 3356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:11.396146', 'step': 3356, 'epoch': 1} {'type': 'loss', 'content': 0.1416698694229126, 'timestamp': '2025-09-10 02:37:11.398371', 'step': 3357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:11.451894', 'step': 3357, 'epoch': 1} {'type': 'loss', 'content': 0.15234677493572235, 'timestamp': '2025-09-10 02:37:11.454120', 'step': 3358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:11.507420', 'step': 3358, 'epoch': 1} {'type': 'loss', 'content': 0.10199075937271118, 'timestamp': '2025-09-10 02:37:11.509442', 'step': 3359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:11.563152', 'step': 3359, 'epoch': 1} {'type': 'loss', 'content': 0.20567956566810608, 'timestamp': '2025-09-10 02:37:11.569045', 'step': 3360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:11.622171', 'step': 3360, 'epoch': 1} {'type': 'loss', 'content': 0.17318469285964966, 'timestamp': '2025-09-10 02:37:11.624129', 'step': 3361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:11.676835', 'step': 3361, 'epoch': 1} {'type': 'loss', 'content': 0.1748015582561493, 'timestamp': '2025-09-10 02:37:11.678790', 'step': 3362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:11.731754', 'step': 3362, 'epoch': 1} {'type': 'loss', 'content': 0.11395399272441864, 'timestamp': '2025-09-10 02:37:11.733658', 'step': 3363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:11.788176', 'step': 3363, 'epoch': 1} {'type': 'loss', 'content': 0.16711993515491486, 'timestamp': '2025-09-10 02:37:11.794407', 'step': 3364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:11.850994', 'step': 3364, 'epoch': 1} {'type': 'loss', 'content': 0.28336095809936523, 'timestamp': '2025-09-10 02:37:11.852968', 'step': 3365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:11.906698', 'step': 3365, 'epoch': 1} {'type': 'loss', 'content': 0.11598625034093857, 'timestamp': '2025-09-10 02:37:11.908743', 'step': 3366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:11.962386', 'step': 3366, 'epoch': 1} {'type': 'loss', 'content': 0.18593299388885498, 'timestamp': '2025-09-10 02:37:11.964615', 'step': 3367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:12.019413', 'step': 3367, 'epoch': 1} {'type': 'loss', 'content': 0.17103810608386993, 'timestamp': '2025-09-10 02:37:12.025371', 'step': 3368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:12.078609', 'step': 3368, 'epoch': 1} {'type': 'loss', 'content': 0.23256440460681915, 'timestamp': '2025-09-10 02:37:12.080667', 'step': 3369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:12.133948', 'step': 3369, 'epoch': 1} {'type': 'loss', 'content': 0.1320011168718338, 'timestamp': '2025-09-10 02:37:12.136089', 'step': 3370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:12.191328', 'step': 3370, 'epoch': 1} {'type': 'loss', 'content': 0.23187261819839478, 'timestamp': '2025-09-10 02:37:12.193674', 'step': 3371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:12.250143', 'step': 3371, 'epoch': 1} {'type': 'loss', 'content': 0.15189912915229797, 'timestamp': '2025-09-10 02:37:12.257059', 'step': 3372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:12.314357', 'step': 3372, 'epoch': 1} {'type': 'loss', 'content': 0.13872312009334564, 'timestamp': '2025-09-10 02:37:12.316720', 'step': 3373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:12.373484', 'step': 3373, 'epoch': 1} {'type': 'loss', 'content': 0.16166727244853973, 'timestamp': '2025-09-10 02:37:12.375565', 'step': 3374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:12.430897', 'step': 3374, 'epoch': 1} {'type': 'loss', 'content': 0.20155736804008484, 'timestamp': '2025-09-10 02:37:12.432788', 'step': 3375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:12.486822', 'step': 3375, 'epoch': 1} {'type': 'loss', 'content': 0.2634409964084625, 'timestamp': '2025-09-10 02:37:12.492801', 'step': 3376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:12.545628', 'step': 3376, 'epoch': 1} {'type': 'loss', 'content': 0.24797189235687256, 'timestamp': '2025-09-10 02:37:12.548264', 'step': 3377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:12.600903', 'step': 3377, 'epoch': 1} {'type': 'loss', 'content': 0.1780920922756195, 'timestamp': '2025-09-10 02:37:12.602925', 'step': 3378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:12.657956', 'step': 3378, 'epoch': 1} {'type': 'loss', 'content': 0.146745502948761, 'timestamp': '2025-09-10 02:37:12.659902', 'step': 3379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:12.713140', 'step': 3379, 'epoch': 1} {'type': 'loss', 'content': 0.18678441643714905, 'timestamp': '2025-09-10 02:37:12.718736', 'step': 3380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:12.771014', 'step': 3380, 'epoch': 1} {'type': 'loss', 'content': 0.18542911112308502, 'timestamp': '2025-09-10 02:37:12.772952', 'step': 3381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:12.826673', 'step': 3381, 'epoch': 1} {'type': 'loss', 'content': 0.22394230961799622, 'timestamp': '2025-09-10 02:37:12.828634', 'step': 3382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:12.883218', 'step': 3382, 'epoch': 1} {'type': 'loss', 'content': 0.19782288372516632, 'timestamp': '2025-09-10 02:37:12.885132', 'step': 3383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:12.938077', 'step': 3383, 'epoch': 1} {'type': 'loss', 'content': 0.1503574401140213, 'timestamp': '2025-09-10 02:37:12.943905', 'step': 3384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:12.996894', 'step': 3384, 'epoch': 1} {'type': 'loss', 'content': 0.20533029735088348, 'timestamp': '2025-09-10 02:37:12.998657', 'step': 3385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:13.053810', 'step': 3385, 'epoch': 1} {'type': 'loss', 'content': 0.19023531675338745, 'timestamp': '2025-09-10 02:37:13.055738', 'step': 3386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:13.108858', 'step': 3386, 'epoch': 1} {'type': 'loss', 'content': 0.12379620969295502, 'timestamp': '2025-09-10 02:37:13.110774', 'step': 3387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:13.164906', 'step': 3387, 'epoch': 1} {'type': 'loss', 'content': 0.10803351551294327, 'timestamp': '2025-09-10 02:37:13.170483', 'step': 3388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:13.222957', 'step': 3388, 'epoch': 1} {'type': 'loss', 'content': 0.1451396346092224, 'timestamp': '2025-09-10 02:37:13.224673', 'step': 3389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:13.277362', 'step': 3389, 'epoch': 1} {'type': 'loss', 'content': 0.22581224143505096, 'timestamp': '2025-09-10 02:37:13.279315', 'step': 3390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:13.332843', 'step': 3390, 'epoch': 1} {'type': 'loss', 'content': 0.16674306988716125, 'timestamp': '2025-09-10 02:37:13.334764', 'step': 3391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:13.388341', 'step': 3391, 'epoch': 1} {'type': 'loss', 'content': 0.1854776293039322, 'timestamp': '2025-09-10 02:37:13.394470', 'step': 3392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:13.447478', 'step': 3392, 'epoch': 1} {'type': 'loss', 'content': 0.12205434590578079, 'timestamp': '2025-09-10 02:37:13.449668', 'step': 3393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:13.502983', 'step': 3393, 'epoch': 1} {'type': 'loss', 'content': 0.1279243677854538, 'timestamp': '2025-09-10 02:37:13.504839', 'step': 3394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:13.557849', 'step': 3394, 'epoch': 1} {'type': 'loss', 'content': 0.11259549111127853, 'timestamp': '2025-09-10 02:37:13.559575', 'step': 3395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:13.612560', 'step': 3395, 'epoch': 1} {'type': 'loss', 'content': 0.20864929258823395, 'timestamp': '2025-09-10 02:37:13.618152', 'step': 3396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:13.671907', 'step': 3396, 'epoch': 1} {'type': 'loss', 'content': 0.17444343864917755, 'timestamp': '2025-09-10 02:37:13.673630', 'step': 3397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:13.726136', 'step': 3397, 'epoch': 1} {'type': 'loss', 'content': 0.19737905263900757, 'timestamp': '2025-09-10 02:37:13.728082', 'step': 3398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:13.781330', 'step': 3398, 'epoch': 1} {'type': 'loss', 'content': 0.20448119938373566, 'timestamp': '2025-09-10 02:37:13.783303', 'step': 3399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:13.836411', 'step': 3399, 'epoch': 1} {'type': 'loss', 'content': 0.1489998698234558, 'timestamp': '2025-09-10 02:37:13.842251', 'step': 3400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:13.895034', 'step': 3400, 'epoch': 1} {'type': 'loss', 'content': 0.15042129158973694, 'timestamp': '2025-09-10 02:37:13.897201', 'step': 3401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:13.951522', 'step': 3401, 'epoch': 1} {'type': 'loss', 'content': 0.12481903284788132, 'timestamp': '2025-09-10 02:37:13.953533', 'step': 3402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:14.007380', 'step': 3402, 'epoch': 1} {'type': 'loss', 'content': 0.28028905391693115, 'timestamp': '2025-09-10 02:37:14.009183', 'step': 3403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:14.062664', 'step': 3403, 'epoch': 1} {'type': 'loss', 'content': 0.1970740556716919, 'timestamp': '2025-09-10 02:37:14.068220', 'step': 3404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:14.120530', 'step': 3404, 'epoch': 1} {'type': 'loss', 'content': 0.18017081916332245, 'timestamp': '2025-09-10 02:37:14.122194', 'step': 3405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:14.175764', 'step': 3405, 'epoch': 1} {'type': 'loss', 'content': 0.130229189991951, 'timestamp': '2025-09-10 02:37:14.177476', 'step': 3406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:14.231430', 'step': 3406, 'epoch': 1} {'type': 'loss', 'content': 0.18910826742649078, 'timestamp': '2025-09-10 02:37:14.233339', 'step': 3407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:14.286055', 'step': 3407, 'epoch': 1} {'type': 'loss', 'content': 0.22142009437084198, 'timestamp': '2025-09-10 02:37:14.291890', 'step': 3408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:14.344361', 'step': 3408, 'epoch': 1} {'type': 'loss', 'content': 0.11482921242713928, 'timestamp': '2025-09-10 02:37:14.346348', 'step': 3409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:14.399224', 'step': 3409, 'epoch': 1} {'type': 'loss', 'content': 0.15795356035232544, 'timestamp': '2025-09-10 02:37:14.401170', 'step': 3410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:14.454364', 'step': 3410, 'epoch': 1} {'type': 'loss', 'content': 0.06426558643579483, 'timestamp': '2025-09-10 02:37:14.456371', 'step': 3411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:14.510180', 'step': 3411, 'epoch': 1} {'type': 'loss', 'content': 0.27211883664131165, 'timestamp': '2025-09-10 02:37:14.516417', 'step': 3412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:14.569885', 'step': 3412, 'epoch': 1} {'type': 'loss', 'content': 0.1885351687669754, 'timestamp': '2025-09-10 02:37:14.572125', 'step': 3413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:14.629737', 'step': 3413, 'epoch': 1} {'type': 'loss', 'content': 0.1473199725151062, 'timestamp': '2025-09-10 02:37:14.632404', 'step': 3414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:14.688207', 'step': 3414, 'epoch': 1} {'type': 'loss', 'content': 0.12651893496513367, 'timestamp': '2025-09-10 02:37:14.692423', 'step': 3415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:14.748015', 'step': 3415, 'epoch': 1} {'type': 'loss', 'content': 0.18166133761405945, 'timestamp': '2025-09-10 02:37:14.754245', 'step': 3416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:14.806904', 'step': 3416, 'epoch': 1} {'type': 'loss', 'content': 0.1464003622531891, 'timestamp': '2025-09-10 02:37:14.809028', 'step': 3417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:14.862517', 'step': 3417, 'epoch': 1} {'type': 'loss', 'content': 0.13405680656433105, 'timestamp': '2025-09-10 02:37:14.864614', 'step': 3418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:14.917777', 'step': 3418, 'epoch': 1} {'type': 'loss', 'content': 0.1413172483444214, 'timestamp': '2025-09-10 02:37:14.919745', 'step': 3419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:14.973153', 'step': 3419, 'epoch': 1} {'type': 'loss', 'content': 0.08894824236631393, 'timestamp': '2025-09-10 02:37:14.979008', 'step': 3420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.033448', 'step': 3420, 'epoch': 1} {'type': 'loss', 'content': 0.22479842603206635, 'timestamp': '2025-09-10 02:37:15.035556', 'step': 3421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.089850', 'step': 3421, 'epoch': 1} {'type': 'loss', 'content': 0.1427236944437027, 'timestamp': '2025-09-10 02:37:15.091822', 'step': 3422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:15.145555', 'step': 3422, 'epoch': 1} {'type': 'loss', 'content': 0.16247721016407013, 'timestamp': '2025-09-10 02:37:15.147407', 'step': 3423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:15.200701', 'step': 3423, 'epoch': 1} {'type': 'loss', 'content': 0.09911613911390305, 'timestamp': '2025-09-10 02:37:15.206777', 'step': 3424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.260265', 'step': 3424, 'epoch': 1} {'type': 'loss', 'content': 0.11656557023525238, 'timestamp': '2025-09-10 02:37:15.262491', 'step': 3425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.315613', 'step': 3425, 'epoch': 1} {'type': 'loss', 'content': 0.1595323383808136, 'timestamp': '2025-09-10 02:37:15.317794', 'step': 3426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:15.371362', 'step': 3426, 'epoch': 1} {'type': 'loss', 'content': 0.25445595383644104, 'timestamp': '2025-09-10 02:37:15.373480', 'step': 3427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:15.426553', 'step': 3427, 'epoch': 1} {'type': 'loss', 'content': 0.26469019055366516, 'timestamp': '2025-09-10 02:37:15.432413', 'step': 3428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:15.484869', 'step': 3428, 'epoch': 1} {'type': 'loss', 'content': 0.20810426771640778, 'timestamp': '2025-09-10 02:37:15.486854', 'step': 3429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:15.541473', 'step': 3429, 'epoch': 1} {'type': 'loss', 'content': 0.2621966004371643, 'timestamp': '2025-09-10 02:37:15.543325', 'step': 3430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.597878', 'step': 3430, 'epoch': 1} {'type': 'loss', 'content': 0.1705455631017685, 'timestamp': '2025-09-10 02:37:15.599582', 'step': 3431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.652869', 'step': 3431, 'epoch': 1} {'type': 'loss', 'content': 0.1708097904920578, 'timestamp': '2025-09-10 02:37:15.658835', 'step': 3432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.712396', 'step': 3432, 'epoch': 1} {'type': 'loss', 'content': 0.09308375418186188, 'timestamp': '2025-09-10 02:37:15.714089', 'step': 3433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.767297', 'step': 3433, 'epoch': 1} {'type': 'loss', 'content': 0.1850799024105072, 'timestamp': '2025-09-10 02:37:15.769313', 'step': 3434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.822903', 'step': 3434, 'epoch': 1} {'type': 'loss', 'content': 0.1873360425233841, 'timestamp': '2025-09-10 02:37:15.826067', 'step': 3435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.879662', 'step': 3435, 'epoch': 1} {'type': 'loss', 'content': 0.27591052651405334, 'timestamp': '2025-09-10 02:37:15.885523', 'step': 3436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:15.937840', 'step': 3436, 'epoch': 1} {'type': 'loss', 'content': 0.13343988358974457, 'timestamp': '2025-09-10 02:37:15.939806', 'step': 3437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:15.993232', 'step': 3437, 'epoch': 1} {'type': 'loss', 'content': 0.25193461775779724, 'timestamp': '2025-09-10 02:37:15.994934', 'step': 3438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:16.048899', 'step': 3438, 'epoch': 1} {'type': 'loss', 'content': 0.1277654469013214, 'timestamp': '2025-09-10 02:37:16.050797', 'step': 3439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:16.105485', 'step': 3439, 'epoch': 1} {'type': 'loss', 'content': 0.18187230825424194, 'timestamp': '2025-09-10 02:37:16.114541', 'step': 3440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:16.170458', 'step': 3440, 'epoch': 1} {'type': 'loss', 'content': 0.16059637069702148, 'timestamp': '2025-09-10 02:37:16.172371', 'step': 3441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:16.230862', 'step': 3441, 'epoch': 1} {'type': 'loss', 'content': 0.19229057431221008, 'timestamp': '2025-09-10 02:37:16.236203', 'step': 3442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:16.292383', 'step': 3442, 'epoch': 1} {'type': 'loss', 'content': 0.1681266725063324, 'timestamp': '2025-09-10 02:37:16.294685', 'step': 3443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:16.348163', 'step': 3443, 'epoch': 1} {'type': 'loss', 'content': 0.13119731843471527, 'timestamp': '2025-09-10 02:37:16.354303', 'step': 3444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:16.418136', 'step': 3444, 'epoch': 1} {'type': 'loss', 'content': 0.20468270778656006, 'timestamp': '2025-09-10 02:37:16.420176', 'step': 3445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:16.472688', 'step': 3445, 'epoch': 1} {'type': 'loss', 'content': 0.16115917265415192, 'timestamp': '2025-09-10 02:37:16.474420', 'step': 3446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:16.528020', 'step': 3446, 'epoch': 1} {'type': 'loss', 'content': 0.16638538241386414, 'timestamp': '2025-09-10 02:37:16.529826', 'step': 3447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:16.583700', 'step': 3447, 'epoch': 1} {'type': 'loss', 'content': 0.21143406629562378, 'timestamp': '2025-09-10 02:37:16.595073', 'step': 3448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:16.649403', 'step': 3448, 'epoch': 1} {'type': 'loss', 'content': 0.14032334089279175, 'timestamp': '2025-09-10 02:37:16.651183', 'step': 3449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:16.708743', 'step': 3449, 'epoch': 1} {'type': 'loss', 'content': 0.16833198070526123, 'timestamp': '2025-09-10 02:37:16.712171', 'step': 3450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:16.767522', 'step': 3450, 'epoch': 1} {'type': 'loss', 'content': 0.1961943656206131, 'timestamp': '2025-09-10 02:37:16.769505', 'step': 3451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:16.824071', 'step': 3451, 'epoch': 1} {'type': 'loss', 'content': 0.19829846918582916, 'timestamp': '2025-09-10 02:37:16.829983', 'step': 3452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:16.882468', 'step': 3452, 'epoch': 1} {'type': 'loss', 'content': 0.18784014880657196, 'timestamp': '2025-09-10 02:37:16.884471', 'step': 3453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:16.941132', 'step': 3453, 'epoch': 1} {'type': 'loss', 'content': 0.18932446837425232, 'timestamp': '2025-09-10 02:37:16.942878', 'step': 3454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:16.996291', 'step': 3454, 'epoch': 1} {'type': 'loss', 'content': 0.13505621254444122, 'timestamp': '2025-09-10 02:37:16.997956', 'step': 3455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:17.051327', 'step': 3455, 'epoch': 1} {'type': 'loss', 'content': 0.1989893764257431, 'timestamp': '2025-09-10 02:37:17.058957', 'step': 3456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:17.113194', 'step': 3456, 'epoch': 1} {'type': 'loss', 'content': 0.08326324820518494, 'timestamp': '2025-09-10 02:37:17.115129', 'step': 3457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:17.172388', 'step': 3457, 'epoch': 1} {'type': 'loss', 'content': 0.1442999243736267, 'timestamp': '2025-09-10 02:37:17.174506', 'step': 3458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:17.228771', 'step': 3458, 'epoch': 1} {'type': 'loss', 'content': 0.2108585089445114, 'timestamp': '2025-09-10 02:37:17.232195', 'step': 3459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:17.286513', 'step': 3459, 'epoch': 1} {'type': 'loss', 'content': 0.14997559785842896, 'timestamp': '2025-09-10 02:37:17.292493', 'step': 3460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:17.345386', 'step': 3460, 'epoch': 1} {'type': 'loss', 'content': 0.1404440701007843, 'timestamp': '2025-09-10 02:37:17.366122', 'step': 3461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:17.428357', 'step': 3461, 'epoch': 1} {'type': 'loss', 'content': 0.10756867378950119, 'timestamp': '2025-09-10 02:37:17.430033', 'step': 3462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:17.483188', 'step': 3462, 'epoch': 1} {'type': 'loss', 'content': 0.14660245180130005, 'timestamp': '2025-09-10 02:37:17.485338', 'step': 3463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:17.540535', 'step': 3463, 'epoch': 1} {'type': 'loss', 'content': 0.16138719022274017, 'timestamp': '2025-09-10 02:37:17.546501', 'step': 3464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:17.600168', 'step': 3464, 'epoch': 1} {'type': 'loss', 'content': 0.17744627594947815, 'timestamp': '2025-09-10 02:37:17.603690', 'step': 3465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:17.658395', 'step': 3465, 'epoch': 1} {'type': 'loss', 'content': 0.10873587429523468, 'timestamp': '2025-09-10 02:37:17.660408', 'step': 3466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:17.715236', 'step': 3466, 'epoch': 1} {'type': 'loss', 'content': 0.19608694314956665, 'timestamp': '2025-09-10 02:37:17.717129', 'step': 3467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:17.775787', 'step': 3467, 'epoch': 1} {'type': 'loss', 'content': 0.1602257490158081, 'timestamp': '2025-09-10 02:37:17.781761', 'step': 3468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:17.835990', 'step': 3468, 'epoch': 1} {'type': 'loss', 'content': 0.1544683426618576, 'timestamp': '2025-09-10 02:37:17.838190', 'step': 3469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:17.891031', 'step': 3469, 'epoch': 1} {'type': 'loss', 'content': 0.18298852443695068, 'timestamp': '2025-09-10 02:37:17.893020', 'step': 3470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:17.947131', 'step': 3470, 'epoch': 1} {'type': 'loss', 'content': 0.22046217322349548, 'timestamp': '2025-09-10 02:37:17.949118', 'step': 3471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:18.002502', 'step': 3471, 'epoch': 1} {'type': 'loss', 'content': 0.15682734549045563, 'timestamp': '2025-09-10 02:37:18.008404', 'step': 3472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:18.061138', 'step': 3472, 'epoch': 1} {'type': 'loss', 'content': 0.1528797447681427, 'timestamp': '2025-09-10 02:37:18.063367', 'step': 3473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:18.116737', 'step': 3473, 'epoch': 1} {'type': 'loss', 'content': 0.1837635189294815, 'timestamp': '2025-09-10 02:37:18.118814', 'step': 3474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:18.172126', 'step': 3474, 'epoch': 1} {'type': 'loss', 'content': 0.15032660961151123, 'timestamp': '2025-09-10 02:37:18.174058', 'step': 3475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:18.228066', 'step': 3475, 'epoch': 1} {'type': 'loss', 'content': 0.2669604420661926, 'timestamp': '2025-09-10 02:37:18.234312', 'step': 3476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:18.286884', 'step': 3476, 'epoch': 1} {'type': 'loss', 'content': 0.14269685745239258, 'timestamp': '2025-09-10 02:37:18.289073', 'step': 3477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:18.342707', 'step': 3477, 'epoch': 1} {'type': 'loss', 'content': 0.1493959128856659, 'timestamp': '2025-09-10 02:37:18.344689', 'step': 3478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:18.397661', 'step': 3478, 'epoch': 1} {'type': 'loss', 'content': 0.15041841566562653, 'timestamp': '2025-09-10 02:37:18.399609', 'step': 3479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:18.452805', 'step': 3479, 'epoch': 1} {'type': 'loss', 'content': 0.19983075559139252, 'timestamp': '2025-09-10 02:37:18.458652', 'step': 3480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:18.511245', 'step': 3480, 'epoch': 1} {'type': 'loss', 'content': 0.15427400171756744, 'timestamp': '2025-09-10 02:37:18.513133', 'step': 3481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:18.565865', 'step': 3481, 'epoch': 1} {'type': 'loss', 'content': 0.2372148483991623, 'timestamp': '2025-09-10 02:37:18.567858', 'step': 3482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:18.621304', 'step': 3482, 'epoch': 1} {'type': 'loss', 'content': 0.21536223590373993, 'timestamp': '2025-09-10 02:37:18.623353', 'step': 3483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:18.676870', 'step': 3483, 'epoch': 1} {'type': 'loss', 'content': 0.17512935400009155, 'timestamp': '2025-09-10 02:37:18.682690', 'step': 3484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:18.736353', 'step': 3484, 'epoch': 1} {'type': 'loss', 'content': 0.15588396787643433, 'timestamp': '2025-09-10 02:37:18.738320', 'step': 3485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:18.792052', 'step': 3485, 'epoch': 1} {'type': 'loss', 'content': 0.11801696568727493, 'timestamp': '2025-09-10 02:37:18.794083', 'step': 3486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:18.848437', 'step': 3486, 'epoch': 1} {'type': 'loss', 'content': 0.18169529736042023, 'timestamp': '2025-09-10 02:37:18.850527', 'step': 3487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:18.904104', 'step': 3487, 'epoch': 1} {'type': 'loss', 'content': 0.13502100110054016, 'timestamp': '2025-09-10 02:37:18.910064', 'step': 3488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:18.962855', 'step': 3488, 'epoch': 1} {'type': 'loss', 'content': 0.20971782505512238, 'timestamp': '2025-09-10 02:37:18.964784', 'step': 3489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:19.017750', 'step': 3489, 'epoch': 1} {'type': 'loss', 'content': 0.1686561554670334, 'timestamp': '2025-09-10 02:37:19.019557', 'step': 3490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:19.073394', 'step': 3490, 'epoch': 1} {'type': 'loss', 'content': 0.2197553515434265, 'timestamp': '2025-09-10 02:37:19.075302', 'step': 3491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:19.128844', 'step': 3491, 'epoch': 1} {'type': 'loss', 'content': 0.14798039197921753, 'timestamp': '2025-09-10 02:37:19.134737', 'step': 3492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:19.187468', 'step': 3492, 'epoch': 1} {'type': 'loss', 'content': 0.0817207545042038, 'timestamp': '2025-09-10 02:37:19.189393', 'step': 3493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:19.243013', 'step': 3493, 'epoch': 1} {'type': 'loss', 'content': 0.228753924369812, 'timestamp': '2025-09-10 02:37:19.245021', 'step': 3494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:19.299695', 'step': 3494, 'epoch': 1} {'type': 'loss', 'content': 0.13451118767261505, 'timestamp': '2025-09-10 02:37:19.301733', 'step': 3495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:19.355002', 'step': 3495, 'epoch': 1} {'type': 'loss', 'content': 0.34324944019317627, 'timestamp': '2025-09-10 02:37:19.360946', 'step': 3496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:19.413580', 'step': 3496, 'epoch': 1} {'type': 'loss', 'content': 0.09965278953313828, 'timestamp': '2025-09-10 02:37:19.416550', 'step': 3497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:19.470519', 'step': 3497, 'epoch': 1} {'type': 'loss', 'content': 0.1945173293352127, 'timestamp': '2025-09-10 02:37:19.473088', 'step': 3498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:19.527162', 'step': 3498, 'epoch': 1} {'type': 'loss', 'content': 0.12864556908607483, 'timestamp': '2025-09-10 02:37:19.529090', 'step': 3499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:19.584006', 'step': 3499, 'epoch': 1} {'type': 'loss', 'content': 0.1253725290298462, 'timestamp': '2025-09-10 02:37:19.589831', 'step': 3500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-09-10 02:37:20.039848', 'step': 3500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:20.096760', 'step': 3500, 'epoch': 1} {'type': 'loss', 'content': 0.21481625735759735, 'timestamp': '2025-09-10 02:37:20.098925', 'step': 3501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:20.157404', 'step': 3501, 'epoch': 1} {'type': 'loss', 'content': 0.12022287398576736, 'timestamp': '2025-09-10 02:37:20.159672', 'step': 3502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:20.217021', 'step': 3502, 'epoch': 1} {'type': 'loss', 'content': 0.10863727331161499, 'timestamp': '2025-09-10 02:37:20.219177', 'step': 3503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:20.288938', 'step': 3503, 'epoch': 1} {'type': 'loss', 'content': 0.1886061280965805, 'timestamp': '2025-09-10 02:37:20.295016', 'step': 3504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:20.351803', 'step': 3504, 'epoch': 1} {'type': 'loss', 'content': 0.1504441201686859, 'timestamp': '2025-09-10 02:37:20.353737', 'step': 3505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:20.408345', 'step': 3505, 'epoch': 1} {'type': 'loss', 'content': 0.1483093500137329, 'timestamp': '2025-09-10 02:37:20.410298', 'step': 3506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:20.465217', 'step': 3506, 'epoch': 1} {'type': 'loss', 'content': 0.18370425701141357, 'timestamp': '2025-09-10 02:37:20.467202', 'step': 3507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:20.523315', 'step': 3507, 'epoch': 1} {'type': 'loss', 'content': 0.16182689368724823, 'timestamp': '2025-09-10 02:37:20.529550', 'step': 3508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:20.590600', 'step': 3508, 'epoch': 1} {'type': 'loss', 'content': 0.17522688210010529, 'timestamp': '2025-09-10 02:37:20.592840', 'step': 3509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:20.654763', 'step': 3509, 'epoch': 1} {'type': 'loss', 'content': 0.20164895057678223, 'timestamp': '2025-09-10 02:37:20.662551', 'step': 3510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:20.719435', 'step': 3510, 'epoch': 1} {'type': 'loss', 'content': 0.24178382754325867, 'timestamp': '2025-09-10 02:37:20.722244', 'step': 3511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:20.780915', 'step': 3511, 'epoch': 1} {'type': 'loss', 'content': 0.15224947035312653, 'timestamp': '2025-09-10 02:37:20.787895', 'step': 3512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:20.861446', 'step': 3512, 'epoch': 1} {'type': 'loss', 'content': 0.17941157519817352, 'timestamp': '2025-09-10 02:37:20.863594', 'step': 3513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:20.931879', 'step': 3513, 'epoch': 1} {'type': 'loss', 'content': 0.17753277719020844, 'timestamp': '2025-09-10 02:37:20.935854', 'step': 3514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:21.002381', 'step': 3514, 'epoch': 1} {'type': 'loss', 'content': 0.304063081741333, 'timestamp': '2025-09-10 02:37:21.004684', 'step': 3515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:21.065767', 'step': 3515, 'epoch': 1} {'type': 'loss', 'content': 0.15555675327777863, 'timestamp': '2025-09-10 02:37:21.073784', 'step': 3516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:21.135416', 'step': 3516, 'epoch': 1} {'type': 'loss', 'content': 0.15793493390083313, 'timestamp': '2025-09-10 02:37:21.137657', 'step': 3517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:21.195428', 'step': 3517, 'epoch': 1} {'type': 'loss', 'content': 0.1409473568201065, 'timestamp': '2025-09-10 02:37:21.197406', 'step': 3518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:21.262791', 'step': 3518, 'epoch': 1} {'type': 'loss', 'content': 0.21510425209999084, 'timestamp': '2025-09-10 02:37:21.265312', 'step': 3519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:21.323696', 'step': 3519, 'epoch': 1} {'type': 'loss', 'content': 0.2058049440383911, 'timestamp': '2025-09-10 02:37:21.330273', 'step': 3520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:21.411978', 'step': 3520, 'epoch': 1} {'type': 'loss', 'content': 0.23571862280368805, 'timestamp': '2025-09-10 02:37:21.413929', 'step': 3521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:21.487056', 'step': 3521, 'epoch': 1} {'type': 'loss', 'content': 0.1356319636106491, 'timestamp': '2025-09-10 02:37:21.490097', 'step': 3522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:21.549147', 'step': 3522, 'epoch': 1} {'type': 'loss', 'content': 0.21359126269817352, 'timestamp': '2025-09-10 02:37:21.551195', 'step': 3523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:21.615549', 'step': 3523, 'epoch': 1} {'type': 'loss', 'content': 0.12748771905899048, 'timestamp': '2025-09-10 02:37:21.622283', 'step': 3524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:21.679077', 'step': 3524, 'epoch': 1} {'type': 'loss', 'content': 0.13420727849006653, 'timestamp': '2025-09-10 02:37:21.688779', 'step': 3525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:21.769381', 'step': 3525, 'epoch': 1} {'type': 'loss', 'content': 0.16346006095409393, 'timestamp': '2025-09-10 02:37:21.771469', 'step': 3526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:21.834566', 'step': 3526, 'epoch': 1} {'type': 'loss', 'content': 0.27264872193336487, 'timestamp': '2025-09-10 02:37:21.836748', 'step': 3527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:21.899835', 'step': 3527, 'epoch': 1} {'type': 'loss', 'content': 0.23383279144763947, 'timestamp': '2025-09-10 02:37:21.908444', 'step': 3528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:21.969136', 'step': 3528, 'epoch': 1} {'type': 'loss', 'content': 0.13725623488426208, 'timestamp': '2025-09-10 02:37:21.978093', 'step': 3529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:22.042226', 'step': 3529, 'epoch': 1} {'type': 'loss', 'content': 0.19525198638439178, 'timestamp': '2025-09-10 02:37:22.044374', 'step': 3530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:22.112431', 'step': 3530, 'epoch': 1} {'type': 'loss', 'content': 0.2347448319196701, 'timestamp': '2025-09-10 02:37:22.114611', 'step': 3531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:22.173501', 'step': 3531, 'epoch': 1} {'type': 'loss', 'content': 0.12386814504861832, 'timestamp': '2025-09-10 02:37:22.180217', 'step': 3532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:22.236790', 'step': 3532, 'epoch': 1} {'type': 'loss', 'content': 0.22711314260959625, 'timestamp': '2025-09-10 02:37:22.238853', 'step': 3533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:22.298646', 'step': 3533, 'epoch': 1} {'type': 'loss', 'content': 0.2008218765258789, 'timestamp': '2025-09-10 02:37:22.300638', 'step': 3534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:22.358620', 'step': 3534, 'epoch': 1} {'type': 'loss', 'content': 0.13635285198688507, 'timestamp': '2025-09-10 02:37:22.360634', 'step': 3535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:22.418604', 'step': 3535, 'epoch': 1} {'type': 'loss', 'content': 0.18093568086624146, 'timestamp': '2025-09-10 02:37:22.425200', 'step': 3536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:22.486261', 'step': 3536, 'epoch': 1} {'type': 'loss', 'content': 0.1640373170375824, 'timestamp': '2025-09-10 02:37:22.488322', 'step': 3537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:22.564178', 'step': 3537, 'epoch': 1} {'type': 'loss', 'content': 0.07758883386850357, 'timestamp': '2025-09-10 02:37:22.566496', 'step': 3538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:22.628553', 'step': 3538, 'epoch': 1} {'type': 'loss', 'content': 0.2544029951095581, 'timestamp': '2025-09-10 02:37:22.630642', 'step': 3539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:22.689430', 'step': 3539, 'epoch': 1} {'type': 'loss', 'content': 0.24741210043430328, 'timestamp': '2025-09-10 02:37:22.696217', 'step': 3540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:22.755734', 'step': 3540, 'epoch': 1} {'type': 'loss', 'content': 0.28205353021621704, 'timestamp': '2025-09-10 02:37:22.757738', 'step': 3541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:22.815507', 'step': 3541, 'epoch': 1} {'type': 'loss', 'content': 0.22587722539901733, 'timestamp': '2025-09-10 02:37:22.817547', 'step': 3542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:22.877592', 'step': 3542, 'epoch': 1} {'type': 'loss', 'content': 0.18244542181491852, 'timestamp': '2025-09-10 02:37:22.879645', 'step': 3543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:22.940375', 'step': 3543, 'epoch': 1} {'type': 'loss', 'content': 0.12637197971343994, 'timestamp': '2025-09-10 02:37:22.947159', 'step': 3544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:23.013471', 'step': 3544, 'epoch': 1} {'type': 'loss', 'content': 0.200617715716362, 'timestamp': '2025-09-10 02:37:23.015723', 'step': 3545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:23.077752', 'step': 3545, 'epoch': 1} {'type': 'loss', 'content': 0.23361854255199432, 'timestamp': '2025-09-10 02:37:23.080090', 'step': 3546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:23.141912', 'step': 3546, 'epoch': 1} {'type': 'loss', 'content': 0.15995904803276062, 'timestamp': '2025-09-10 02:37:23.144246', 'step': 3547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:23.206014', 'step': 3547, 'epoch': 1} {'type': 'loss', 'content': 0.10229881852865219, 'timestamp': '2025-09-10 02:37:23.212953', 'step': 3548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:23.274734', 'step': 3548, 'epoch': 1} {'type': 'loss', 'content': 0.12958668172359467, 'timestamp': '2025-09-10 02:37:23.276749', 'step': 3549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:23.335477', 'step': 3549, 'epoch': 1} {'type': 'loss', 'content': 0.2134760469198227, 'timestamp': '2025-09-10 02:37:23.339016', 'step': 3550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:23.410836', 'step': 3550, 'epoch': 1} {'type': 'loss', 'content': 0.12059253454208374, 'timestamp': '2025-09-10 02:37:23.412831', 'step': 3551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:23.475885', 'step': 3551, 'epoch': 1} {'type': 'loss', 'content': 0.11946631968021393, 'timestamp': '2025-09-10 02:37:23.482974', 'step': 3552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:23.540773', 'step': 3552, 'epoch': 1} {'type': 'loss', 'content': 0.123488649725914, 'timestamp': '2025-09-10 02:37:23.543434', 'step': 3553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:23.602191', 'step': 3553, 'epoch': 1} {'type': 'loss', 'content': 0.10411077737808228, 'timestamp': '2025-09-10 02:37:23.604184', 'step': 3554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:23.661063', 'step': 3554, 'epoch': 1} {'type': 'loss', 'content': 0.16683685779571533, 'timestamp': '2025-09-10 02:37:23.662999', 'step': 3555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:23.720030', 'step': 3555, 'epoch': 1} {'type': 'loss', 'content': 0.18309298157691956, 'timestamp': '2025-09-10 02:37:23.726691', 'step': 3556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:23.779759', 'step': 3556, 'epoch': 1} {'type': 'loss', 'content': 0.1602877825498581, 'timestamp': '2025-09-10 02:37:23.781650', 'step': 3557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:23.836059', 'step': 3557, 'epoch': 1} {'type': 'loss', 'content': 0.16128480434417725, 'timestamp': '2025-09-10 02:37:23.837990', 'step': 3558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:23.908416', 'step': 3558, 'epoch': 1} {'type': 'loss', 'content': 0.267699658870697, 'timestamp': '2025-09-10 02:37:23.910493', 'step': 3559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:23.964696', 'step': 3559, 'epoch': 1} {'type': 'loss', 'content': 0.09357534348964691, 'timestamp': '2025-09-10 02:37:23.970788', 'step': 3560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:24.030371', 'step': 3560, 'epoch': 1} {'type': 'loss', 'content': 0.2371985912322998, 'timestamp': '2025-09-10 02:37:24.032359', 'step': 3561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:24.086304', 'step': 3561, 'epoch': 1} {'type': 'loss', 'content': 0.18673214316368103, 'timestamp': '2025-09-10 02:37:24.088290', 'step': 3562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:24.143525', 'step': 3562, 'epoch': 1} {'type': 'loss', 'content': 0.13313283026218414, 'timestamp': '2025-09-10 02:37:24.145529', 'step': 3563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:24.199388', 'step': 3563, 'epoch': 1} {'type': 'loss', 'content': 0.18778589367866516, 'timestamp': '2025-09-10 02:37:24.205387', 'step': 3564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:24.259445', 'step': 3564, 'epoch': 1} {'type': 'loss', 'content': 0.14068914949893951, 'timestamp': '2025-09-10 02:37:24.261486', 'step': 3565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:24.315072', 'step': 3565, 'epoch': 1} {'type': 'loss', 'content': 0.25138676166534424, 'timestamp': '2025-09-10 02:37:24.317068', 'step': 3566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:24.370871', 'step': 3566, 'epoch': 1} {'type': 'loss', 'content': 0.09659439325332642, 'timestamp': '2025-09-10 02:37:24.372802', 'step': 3567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:24.427543', 'step': 3567, 'epoch': 1} {'type': 'loss', 'content': 0.1278744339942932, 'timestamp': '2025-09-10 02:37:24.433329', 'step': 3568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:24.487186', 'step': 3568, 'epoch': 1} {'type': 'loss', 'content': 0.2159009426832199, 'timestamp': '2025-09-10 02:37:24.489127', 'step': 3569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:24.542191', 'step': 3569, 'epoch': 1} {'type': 'loss', 'content': 0.19351112842559814, 'timestamp': '2025-09-10 02:37:24.544109', 'step': 3570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:24.597851', 'step': 3570, 'epoch': 1} {'type': 'loss', 'content': 0.21349793672561646, 'timestamp': '2025-09-10 02:37:24.599926', 'step': 3571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:24.654508', 'step': 3571, 'epoch': 1} {'type': 'loss', 'content': 0.1706327348947525, 'timestamp': '2025-09-10 02:37:24.660484', 'step': 3572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:24.713796', 'step': 3572, 'epoch': 1} {'type': 'loss', 'content': 0.16264206171035767, 'timestamp': '2025-09-10 02:37:24.715934', 'step': 3573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:24.769988', 'step': 3573, 'epoch': 1} {'type': 'loss', 'content': 0.22562910616397858, 'timestamp': '2025-09-10 02:37:24.772122', 'step': 3574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:24.825648', 'step': 3574, 'epoch': 1} {'type': 'loss', 'content': 0.1689772605895996, 'timestamp': '2025-09-10 02:37:24.827756', 'step': 3575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:24.881574', 'step': 3575, 'epoch': 1} {'type': 'loss', 'content': 0.2159949541091919, 'timestamp': '2025-09-10 02:37:24.887353', 'step': 3576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:24.939483', 'step': 3576, 'epoch': 1} {'type': 'loss', 'content': 0.2378787398338318, 'timestamp': '2025-09-10 02:37:24.941430', 'step': 3577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:25.000743', 'step': 3577, 'epoch': 1} {'type': 'loss', 'content': 0.16845589876174927, 'timestamp': '2025-09-10 02:37:25.002701', 'step': 3578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:25.055957', 'step': 3578, 'epoch': 1} {'type': 'loss', 'content': 0.08397601544857025, 'timestamp': '2025-09-10 02:37:25.057900', 'step': 3579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:25.111823', 'step': 3579, 'epoch': 1} {'type': 'loss', 'content': 0.09927839785814285, 'timestamp': '2025-09-10 02:37:25.117837', 'step': 3580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:25.171738', 'step': 3580, 'epoch': 1} {'type': 'loss', 'content': 0.11870652437210083, 'timestamp': '2025-09-10 02:37:25.173918', 'step': 3581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:25.227317', 'step': 3581, 'epoch': 1} {'type': 'loss', 'content': 0.1413891762495041, 'timestamp': '2025-09-10 02:37:25.229291', 'step': 3582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:25.282469', 'step': 3582, 'epoch': 1} {'type': 'loss', 'content': 0.17234812676906586, 'timestamp': '2025-09-10 02:37:25.284608', 'step': 3583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:25.339780', 'step': 3583, 'epoch': 1} {'type': 'loss', 'content': 0.12957017123699188, 'timestamp': '2025-09-10 02:37:25.345766', 'step': 3584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:25.399255', 'step': 3584, 'epoch': 1} {'type': 'loss', 'content': 0.20981955528259277, 'timestamp': '2025-09-10 02:37:25.401220', 'step': 3585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:25.456245', 'step': 3585, 'epoch': 1} {'type': 'loss', 'content': 0.28276026248931885, 'timestamp': '2025-09-10 02:37:25.458258', 'step': 3586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:25.513509', 'step': 3586, 'epoch': 1} {'type': 'loss', 'content': 0.13945803046226501, 'timestamp': '2025-09-10 02:37:25.516671', 'step': 3587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:25.572437', 'step': 3587, 'epoch': 1} {'type': 'loss', 'content': 0.13794538378715515, 'timestamp': '2025-09-10 02:37:25.578637', 'step': 3588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:25.634561', 'step': 3588, 'epoch': 1} {'type': 'loss', 'content': 0.16455525159835815, 'timestamp': '2025-09-10 02:37:25.636709', 'step': 3589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:25.692238', 'step': 3589, 'epoch': 1} {'type': 'loss', 'content': 0.20983253419399261, 'timestamp': '2025-09-10 02:37:25.694218', 'step': 3590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:25.750832', 'step': 3590, 'epoch': 1} {'type': 'loss', 'content': 0.11247628182172775, 'timestamp': '2025-09-10 02:37:25.752810', 'step': 3591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:25.807978', 'step': 3591, 'epoch': 1} {'type': 'loss', 'content': 0.1447926014661789, 'timestamp': '2025-09-10 02:37:25.814056', 'step': 3592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:25.870761', 'step': 3592, 'epoch': 1} {'type': 'loss', 'content': 0.15775588154792786, 'timestamp': '2025-09-10 02:37:25.872709', 'step': 3593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:25.927104', 'step': 3593, 'epoch': 1} {'type': 'loss', 'content': 0.19222503900527954, 'timestamp': '2025-09-10 02:37:25.928721', 'step': 3594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:25.983513', 'step': 3594, 'epoch': 1} {'type': 'loss', 'content': 0.16342642903327942, 'timestamp': '2025-09-10 02:37:25.985496', 'step': 3595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:26.042645', 'step': 3595, 'epoch': 1} {'type': 'loss', 'content': 0.1231517344713211, 'timestamp': '2025-09-10 02:37:26.048913', 'step': 3596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:26.109998', 'step': 3596, 'epoch': 1} {'type': 'loss', 'content': 0.22307156026363373, 'timestamp': '2025-09-10 02:37:26.112122', 'step': 3597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:26.167378', 'step': 3597, 'epoch': 1} {'type': 'loss', 'content': 0.14181803166866302, 'timestamp': '2025-09-10 02:37:26.169465', 'step': 3598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:26.224239', 'step': 3598, 'epoch': 1} {'type': 'loss', 'content': 0.14368173480033875, 'timestamp': '2025-09-10 02:37:26.226453', 'step': 3599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:26.281555', 'step': 3599, 'epoch': 1} {'type': 'loss', 'content': 0.20771850645542145, 'timestamp': '2025-09-10 02:37:26.287771', 'step': 3600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:26.342312', 'step': 3600, 'epoch': 1} {'type': 'loss', 'content': 0.1899062991142273, 'timestamp': '2025-09-10 02:37:26.344483', 'step': 3601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:26.400588', 'step': 3601, 'epoch': 1} {'type': 'loss', 'content': 0.12828868627548218, 'timestamp': '2025-09-10 02:37:26.402966', 'step': 3602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:26.460275', 'step': 3602, 'epoch': 1} {'type': 'loss', 'content': 0.12004793435335159, 'timestamp': '2025-09-10 02:37:26.462586', 'step': 3603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:26.518694', 'step': 3603, 'epoch': 1} {'type': 'loss', 'content': 0.1723576784133911, 'timestamp': '2025-09-10 02:37:26.525173', 'step': 3604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:26.579872', 'step': 3604, 'epoch': 1} {'type': 'loss', 'content': 0.10242441296577454, 'timestamp': '2025-09-10 02:37:26.582110', 'step': 3605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:26.637036', 'step': 3605, 'epoch': 1} {'type': 'loss', 'content': 0.171005517244339, 'timestamp': '2025-09-10 02:37:26.639028', 'step': 3606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:26.694365', 'step': 3606, 'epoch': 1} {'type': 'loss', 'content': 0.25776052474975586, 'timestamp': '2025-09-10 02:37:26.696547', 'step': 3607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:26.751516', 'step': 3607, 'epoch': 1} {'type': 'loss', 'content': 0.254489004611969, 'timestamp': '2025-09-10 02:37:26.757961', 'step': 3608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:26.813207', 'step': 3608, 'epoch': 1} {'type': 'loss', 'content': 0.1733292192220688, 'timestamp': '2025-09-10 02:37:26.815520', 'step': 3609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:26.870208', 'step': 3609, 'epoch': 1} {'type': 'loss', 'content': 0.138983353972435, 'timestamp': '2025-09-10 02:37:26.872308', 'step': 3610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:26.927613', 'step': 3610, 'epoch': 1} {'type': 'loss', 'content': 0.23333200812339783, 'timestamp': '2025-09-10 02:37:26.929724', 'step': 3611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:26.983386', 'step': 3611, 'epoch': 1} {'type': 'loss', 'content': 0.2443358302116394, 'timestamp': '2025-09-10 02:37:26.989589', 'step': 3612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:27.042828', 'step': 3612, 'epoch': 1} {'type': 'loss', 'content': 0.13836345076560974, 'timestamp': '2025-09-10 02:37:27.044937', 'step': 3613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:27.099877', 'step': 3613, 'epoch': 1} {'type': 'loss', 'content': 0.2163199782371521, 'timestamp': '2025-09-10 02:37:27.102091', 'step': 3614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:27.155347', 'step': 3614, 'epoch': 1} {'type': 'loss', 'content': 0.08689772337675095, 'timestamp': '2025-09-10 02:37:27.157510', 'step': 3615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:27.211096', 'step': 3615, 'epoch': 1} {'type': 'loss', 'content': 0.19674353301525116, 'timestamp': '2025-09-10 02:37:27.217024', 'step': 3616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:27.270207', 'step': 3616, 'epoch': 1} {'type': 'loss', 'content': 0.12459084391593933, 'timestamp': '2025-09-10 02:37:27.272541', 'step': 3617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:27.327169', 'step': 3617, 'epoch': 1} {'type': 'loss', 'content': 0.1740712821483612, 'timestamp': '2025-09-10 02:37:27.329514', 'step': 3618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:27.384999', 'step': 3618, 'epoch': 1} {'type': 'loss', 'content': 0.18552343547344208, 'timestamp': '2025-09-10 02:37:27.387116', 'step': 3619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:27.440465', 'step': 3619, 'epoch': 1} {'type': 'loss', 'content': 0.21662355959415436, 'timestamp': '2025-09-10 02:37:27.446568', 'step': 3620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:27.499891', 'step': 3620, 'epoch': 1} {'type': 'loss', 'content': 0.17568157613277435, 'timestamp': '2025-09-10 02:37:27.501918', 'step': 3621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:27.557318', 'step': 3621, 'epoch': 1} {'type': 'loss', 'content': 0.13835440576076508, 'timestamp': '2025-09-10 02:37:27.559409', 'step': 3622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:27.612651', 'step': 3622, 'epoch': 1} {'type': 'loss', 'content': 0.13792377710342407, 'timestamp': '2025-09-10 02:37:27.614818', 'step': 3623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:27.670825', 'step': 3623, 'epoch': 1} {'type': 'loss', 'content': 0.2708073854446411, 'timestamp': '2025-09-10 02:37:27.676779', 'step': 3624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:27.729909', 'step': 3624, 'epoch': 1} {'type': 'loss', 'content': 0.20098790526390076, 'timestamp': '2025-09-10 02:37:27.732058', 'step': 3625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:27.785991', 'step': 3625, 'epoch': 1} {'type': 'loss', 'content': 0.09146738797426224, 'timestamp': '2025-09-10 02:37:27.788247', 'step': 3626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:27.842728', 'step': 3626, 'epoch': 1} {'type': 'loss', 'content': 0.1407550424337387, 'timestamp': '2025-09-10 02:37:27.844805', 'step': 3627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:27.898610', 'step': 3627, 'epoch': 1} {'type': 'loss', 'content': 0.1465281844139099, 'timestamp': '2025-09-10 02:37:27.904601', 'step': 3628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:27.957894', 'step': 3628, 'epoch': 1} {'type': 'loss', 'content': 0.24381761252880096, 'timestamp': '2025-09-10 02:37:27.960193', 'step': 3629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:28.014407', 'step': 3629, 'epoch': 1} {'type': 'loss', 'content': 0.18586649000644684, 'timestamp': '2025-09-10 02:37:28.016893', 'step': 3630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:28.073202', 'step': 3630, 'epoch': 1} {'type': 'loss', 'content': 0.1843593418598175, 'timestamp': '2025-09-10 02:37:28.075506', 'step': 3631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:28.129614', 'step': 3631, 'epoch': 1} {'type': 'loss', 'content': 0.18906717002391815, 'timestamp': '2025-09-10 02:37:28.135619', 'step': 3632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:28.188514', 'step': 3632, 'epoch': 1} {'type': 'loss', 'content': 0.27015313506126404, 'timestamp': '2025-09-10 02:37:28.190640', 'step': 3633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:28.244340', 'step': 3633, 'epoch': 1} {'type': 'loss', 'content': 0.16345395147800446, 'timestamp': '2025-09-10 02:37:28.246794', 'step': 3634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:28.301201', 'step': 3634, 'epoch': 1} {'type': 'loss', 'content': 0.10574318468570709, 'timestamp': '2025-09-10 02:37:28.303296', 'step': 3635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:28.360994', 'step': 3635, 'epoch': 1} {'type': 'loss', 'content': 0.22825825214385986, 'timestamp': '2025-09-10 02:37:28.367397', 'step': 3636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:28.421956', 'step': 3636, 'epoch': 1} {'type': 'loss', 'content': 0.16662146151065826, 'timestamp': '2025-09-10 02:37:28.424129', 'step': 3637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:28.478637', 'step': 3637, 'epoch': 1} {'type': 'loss', 'content': 0.14466147124767303, 'timestamp': '2025-09-10 02:37:28.480742', 'step': 3638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:28.539244', 'step': 3638, 'epoch': 1} {'type': 'loss', 'content': 0.18208664655685425, 'timestamp': '2025-09-10 02:37:28.541428', 'step': 3639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:28.594693', 'step': 3639, 'epoch': 1} {'type': 'loss', 'content': 0.1970866471529007, 'timestamp': '2025-09-10 02:37:28.600757', 'step': 3640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:28.659031', 'step': 3640, 'epoch': 1} {'type': 'loss', 'content': 0.23769621551036835, 'timestamp': '2025-09-10 02:37:28.661148', 'step': 3641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:28.715597', 'step': 3641, 'epoch': 1} {'type': 'loss', 'content': 0.18990762531757355, 'timestamp': '2025-09-10 02:37:28.717776', 'step': 3642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:28.772698', 'step': 3642, 'epoch': 1} {'type': 'loss', 'content': 0.16092434525489807, 'timestamp': '2025-09-10 02:37:28.775170', 'step': 3643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:28.833832', 'step': 3643, 'epoch': 1} {'type': 'loss', 'content': 0.12661591172218323, 'timestamp': '2025-09-10 02:37:28.840156', 'step': 3644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:28.895624', 'step': 3644, 'epoch': 1} {'type': 'loss', 'content': 0.13156330585479736, 'timestamp': '2025-09-10 02:37:28.897825', 'step': 3645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:28.952468', 'step': 3645, 'epoch': 1} {'type': 'loss', 'content': 0.13812901079654694, 'timestamp': '2025-09-10 02:37:28.954998', 'step': 3646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:29.009715', 'step': 3646, 'epoch': 1} {'type': 'loss', 'content': 0.1506531536579132, 'timestamp': '2025-09-10 02:37:29.012087', 'step': 3647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:29.069138', 'step': 3647, 'epoch': 1} {'type': 'loss', 'content': 0.25770825147628784, 'timestamp': '2025-09-10 02:37:29.075996', 'step': 3648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:29.131885', 'step': 3648, 'epoch': 1} {'type': 'loss', 'content': 0.2008126974105835, 'timestamp': '2025-09-10 02:37:29.134756', 'step': 3649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:29.190932', 'step': 3649, 'epoch': 1} {'type': 'loss', 'content': 0.2889982759952545, 'timestamp': '2025-09-10 02:37:29.193096', 'step': 3650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:29.248407', 'step': 3650, 'epoch': 1} {'type': 'loss', 'content': 0.15587012469768524, 'timestamp': '2025-09-10 02:37:29.250604', 'step': 3651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:37:29.324385', 'step': 3651, 'epoch': 1} {'type': 'loss', 'content': 0.11672864854335785, 'timestamp': '2025-09-10 02:37:29.331948', 'step': 3652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:29.386860', 'step': 3652, 'epoch': 1} {'type': 'loss', 'content': 0.13618488609790802, 'timestamp': '2025-09-10 02:37:29.389074', 'step': 3653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:29.454071', 'step': 3653, 'epoch': 1} {'type': 'loss', 'content': 0.1371680647134781, 'timestamp': '2025-09-10 02:37:29.456437', 'step': 3654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:29.522535', 'step': 3654, 'epoch': 1} {'type': 'loss', 'content': 0.12304424494504929, 'timestamp': '2025-09-10 02:37:29.526751', 'step': 3655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:29.582723', 'step': 3655, 'epoch': 1} {'type': 'loss', 'content': 0.16257105767726898, 'timestamp': '2025-09-10 02:37:29.589266', 'step': 3656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:29.649964', 'step': 3656, 'epoch': 1} {'type': 'loss', 'content': 0.1915835738182068, 'timestamp': '2025-09-10 02:37:29.652086', 'step': 3657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:29.722101', 'step': 3657, 'epoch': 1} {'type': 'loss', 'content': 0.16853201389312744, 'timestamp': '2025-09-10 02:37:29.724176', 'step': 3658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:29.780869', 'step': 3658, 'epoch': 1} {'type': 'loss', 'content': 0.15590490400791168, 'timestamp': '2025-09-10 02:37:29.783094', 'step': 3659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:29.843165', 'step': 3659, 'epoch': 1} {'type': 'loss', 'content': 0.31554079055786133, 'timestamp': '2025-09-10 02:37:29.849588', 'step': 3660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:29.904306', 'step': 3660, 'epoch': 1} {'type': 'loss', 'content': 0.17798517644405365, 'timestamp': '2025-09-10 02:37:29.906663', 'step': 3661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:29.961576', 'step': 3661, 'epoch': 1} {'type': 'loss', 'content': 0.20676450431346893, 'timestamp': '2025-09-10 02:37:29.963769', 'step': 3662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:30.018234', 'step': 3662, 'epoch': 1} {'type': 'loss', 'content': 0.2703949511051178, 'timestamp': '2025-09-10 02:37:30.020331', 'step': 3663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:30.074377', 'step': 3663, 'epoch': 1} {'type': 'loss', 'content': 0.16429615020751953, 'timestamp': '2025-09-10 02:37:30.080777', 'step': 3664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:30.136012', 'step': 3664, 'epoch': 1} {'type': 'loss', 'content': 0.16017654538154602, 'timestamp': '2025-09-10 02:37:30.138159', 'step': 3665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:30.196834', 'step': 3665, 'epoch': 1} {'type': 'loss', 'content': 0.2087625116109848, 'timestamp': '2025-09-10 02:37:30.200292', 'step': 3666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:30.257610', 'step': 3666, 'epoch': 1} {'type': 'loss', 'content': 0.20189517736434937, 'timestamp': '2025-09-10 02:37:30.259743', 'step': 3667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:30.317413', 'step': 3667, 'epoch': 1} {'type': 'loss', 'content': 0.16153734922409058, 'timestamp': '2025-09-10 02:37:30.334871', 'step': 3668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:30.394744', 'step': 3668, 'epoch': 1} {'type': 'loss', 'content': 0.13532458245754242, 'timestamp': '2025-09-10 02:37:30.396964', 'step': 3669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:30.456438', 'step': 3669, 'epoch': 1} {'type': 'loss', 'content': 0.1639736145734787, 'timestamp': '2025-09-10 02:37:30.458758', 'step': 3670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:30.517008', 'step': 3670, 'epoch': 1} {'type': 'loss', 'content': 0.1674751490354538, 'timestamp': '2025-09-10 02:37:30.519324', 'step': 3671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:30.580253', 'step': 3671, 'epoch': 1} {'type': 'loss', 'content': 0.25958019495010376, 'timestamp': '2025-09-10 02:37:30.587071', 'step': 3672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:30.644483', 'step': 3672, 'epoch': 1} {'type': 'loss', 'content': 0.11710299551486969, 'timestamp': '2025-09-10 02:37:30.646841', 'step': 3673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:30.707989', 'step': 3673, 'epoch': 1} {'type': 'loss', 'content': 0.17451514303684235, 'timestamp': '2025-09-10 02:37:30.710133', 'step': 3674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:30.767827', 'step': 3674, 'epoch': 1} {'type': 'loss', 'content': 0.27769121527671814, 'timestamp': '2025-09-10 02:37:30.770142', 'step': 3675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:30.832135', 'step': 3675, 'epoch': 1} {'type': 'loss', 'content': 0.07978975772857666, 'timestamp': '2025-09-10 02:37:30.840120', 'step': 3676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:30.897241', 'step': 3676, 'epoch': 1} {'type': 'loss', 'content': 0.18537531793117523, 'timestamp': '2025-09-10 02:37:30.899760', 'step': 3677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:30.956333', 'step': 3677, 'epoch': 1} {'type': 'loss', 'content': 0.13610513508319855, 'timestamp': '2025-09-10 02:37:30.962338', 'step': 3678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:31.021964', 'step': 3678, 'epoch': 1} {'type': 'loss', 'content': 0.1448383629322052, 'timestamp': '2025-09-10 02:37:31.024247', 'step': 3679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:31.081069', 'step': 3679, 'epoch': 1} {'type': 'loss', 'content': 0.21609799563884735, 'timestamp': '2025-09-10 02:37:31.087697', 'step': 3680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:31.148878', 'step': 3680, 'epoch': 1} {'type': 'loss', 'content': 0.17585204541683197, 'timestamp': '2025-09-10 02:37:31.151371', 'step': 3681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:31.212575', 'step': 3681, 'epoch': 1} {'type': 'loss', 'content': 0.12167531251907349, 'timestamp': '2025-09-10 02:37:31.215061', 'step': 3682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:31.277172', 'step': 3682, 'epoch': 1} {'type': 'loss', 'content': 0.17312224209308624, 'timestamp': '2025-09-10 02:37:31.281648', 'step': 3683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:31.349921', 'step': 3683, 'epoch': 1} {'type': 'loss', 'content': 0.16060437262058258, 'timestamp': '2025-09-10 02:37:31.357442', 'step': 3684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:31.417092', 'step': 3684, 'epoch': 1} {'type': 'loss', 'content': 0.13596156239509583, 'timestamp': '2025-09-10 02:37:31.419525', 'step': 3685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:31.484530', 'step': 3685, 'epoch': 1} {'type': 'loss', 'content': 0.30653348565101624, 'timestamp': '2025-09-10 02:37:31.488656', 'step': 3686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:31.545052', 'step': 3686, 'epoch': 1} {'type': 'loss', 'content': 0.1895151287317276, 'timestamp': '2025-09-10 02:37:31.547311', 'step': 3687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:31.607586', 'step': 3687, 'epoch': 1} {'type': 'loss', 'content': 0.22837312519550323, 'timestamp': '2025-09-10 02:37:31.613929', 'step': 3688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:31.669497', 'step': 3688, 'epoch': 1} {'type': 'loss', 'content': 0.18489432334899902, 'timestamp': '2025-09-10 02:37:31.671498', 'step': 3689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:31.725749', 'step': 3689, 'epoch': 1} {'type': 'loss', 'content': 0.23590070009231567, 'timestamp': '2025-09-10 02:37:31.728102', 'step': 3690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:31.783766', 'step': 3690, 'epoch': 1} {'type': 'loss', 'content': 0.1763356626033783, 'timestamp': '2025-09-10 02:37:31.785911', 'step': 3691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:31.846754', 'step': 3691, 'epoch': 1} {'type': 'loss', 'content': 0.13977180421352386, 'timestamp': '2025-09-10 02:37:31.857370', 'step': 3692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:31.911449', 'step': 3692, 'epoch': 1} {'type': 'loss', 'content': 0.1548057496547699, 'timestamp': '2025-09-10 02:37:31.913591', 'step': 3693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:31.974621', 'step': 3693, 'epoch': 1} {'type': 'loss', 'content': 0.25165224075317383, 'timestamp': '2025-09-10 02:37:31.976693', 'step': 3694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:32.030404', 'step': 3694, 'epoch': 1} {'type': 'loss', 'content': 0.2515445947647095, 'timestamp': '2025-09-10 02:37:32.032590', 'step': 3695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:32.086613', 'step': 3695, 'epoch': 1} {'type': 'loss', 'content': 0.11992724984884262, 'timestamp': '2025-09-10 02:37:32.104190', 'step': 3696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:32.160609', 'step': 3696, 'epoch': 1} {'type': 'loss', 'content': 0.08938998728990555, 'timestamp': '2025-09-10 02:37:32.162804', 'step': 3697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:32.218685', 'step': 3697, 'epoch': 1} {'type': 'loss', 'content': 0.21412619948387146, 'timestamp': '2025-09-10 02:37:32.220918', 'step': 3698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:32.281059', 'step': 3698, 'epoch': 1} {'type': 'loss', 'content': 0.1257801204919815, 'timestamp': '2025-09-10 02:37:32.283193', 'step': 3699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:32.337496', 'step': 3699, 'epoch': 1} {'type': 'loss', 'content': 0.08312193304300308, 'timestamp': '2025-09-10 02:37:32.343590', 'step': 3700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:32.396847', 'step': 3700, 'epoch': 1} {'type': 'loss', 'content': 0.20622482895851135, 'timestamp': '2025-09-10 02:37:32.399083', 'step': 3701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:32.452746', 'step': 3701, 'epoch': 1} {'type': 'loss', 'content': 0.1629788875579834, 'timestamp': '2025-09-10 02:37:32.454936', 'step': 3702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:32.515664', 'step': 3702, 'epoch': 1} {'type': 'loss', 'content': 0.2028183490037918, 'timestamp': '2025-09-10 02:37:32.521357', 'step': 3703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:32.580102', 'step': 3703, 'epoch': 1} {'type': 'loss', 'content': 0.24786685407161713, 'timestamp': '2025-09-10 02:37:32.591201', 'step': 3704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:32.651428', 'step': 3704, 'epoch': 1} {'type': 'loss', 'content': 0.1516188681125641, 'timestamp': '2025-09-10 02:37:32.653697', 'step': 3705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:32.707783', 'step': 3705, 'epoch': 1} {'type': 'loss', 'content': 0.1357063353061676, 'timestamp': '2025-09-10 02:37:32.709931', 'step': 3706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:32.765882', 'step': 3706, 'epoch': 1} {'type': 'loss', 'content': 0.20624959468841553, 'timestamp': '2025-09-10 02:37:32.768184', 'step': 3707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:32.821762', 'step': 3707, 'epoch': 1} {'type': 'loss', 'content': 0.20599229633808136, 'timestamp': '2025-09-10 02:37:32.830202', 'step': 3708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:32.883063', 'step': 3708, 'epoch': 1} {'type': 'loss', 'content': 0.15980112552642822, 'timestamp': '2025-09-10 02:37:32.886963', 'step': 3709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:32.942772', 'step': 3709, 'epoch': 1} {'type': 'loss', 'content': 0.13946065306663513, 'timestamp': '2025-09-10 02:37:32.944895', 'step': 3710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:32.998409', 'step': 3710, 'epoch': 1} {'type': 'loss', 'content': 0.23611752688884735, 'timestamp': '2025-09-10 02:37:33.000541', 'step': 3711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:33.053956', 'step': 3711, 'epoch': 1} {'type': 'loss', 'content': 0.13981951773166656, 'timestamp': '2025-09-10 02:37:33.059771', 'step': 3712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:33.114818', 'step': 3712, 'epoch': 1} {'type': 'loss', 'content': 0.24989460408687592, 'timestamp': '2025-09-10 02:37:33.119624', 'step': 3713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:33.174501', 'step': 3713, 'epoch': 1} {'type': 'loss', 'content': 0.16158844530582428, 'timestamp': '2025-09-10 02:37:33.176690', 'step': 3714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:33.231576', 'step': 3714, 'epoch': 1} {'type': 'loss', 'content': 0.2312552034854889, 'timestamp': '2025-09-10 02:37:33.233738', 'step': 3715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:33.289857', 'step': 3715, 'epoch': 1} {'type': 'loss', 'content': 0.2836582064628601, 'timestamp': '2025-09-10 02:37:33.295695', 'step': 3716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:33.349226', 'step': 3716, 'epoch': 1} {'type': 'loss', 'content': 0.13862809538841248, 'timestamp': '2025-09-10 02:37:33.351174', 'step': 3717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:33.406362', 'step': 3717, 'epoch': 1} {'type': 'loss', 'content': 0.1961236447095871, 'timestamp': '2025-09-10 02:37:33.408543', 'step': 3718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:33.461407', 'step': 3718, 'epoch': 1} {'type': 'loss', 'content': 0.17317263782024384, 'timestamp': '2025-09-10 02:37:33.463468', 'step': 3719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:33.516581', 'step': 3719, 'epoch': 1} {'type': 'loss', 'content': 0.14012809097766876, 'timestamp': '2025-09-10 02:37:33.522743', 'step': 3720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:33.576202', 'step': 3720, 'epoch': 1} {'type': 'loss', 'content': 0.17883461713790894, 'timestamp': '2025-09-10 02:37:33.578466', 'step': 3721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:33.632071', 'step': 3721, 'epoch': 1} {'type': 'loss', 'content': 0.11510764807462692, 'timestamp': '2025-09-10 02:37:33.634209', 'step': 3722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:33.687743', 'step': 3722, 'epoch': 1} {'type': 'loss', 'content': 0.19879597425460815, 'timestamp': '2025-09-10 02:37:33.689964', 'step': 3723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:33.746306', 'step': 3723, 'epoch': 1} {'type': 'loss', 'content': 0.20201048254966736, 'timestamp': '2025-09-10 02:37:33.752463', 'step': 3724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:33.806088', 'step': 3724, 'epoch': 1} {'type': 'loss', 'content': 0.1439397782087326, 'timestamp': '2025-09-10 02:37:33.808096', 'step': 3725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:33.860753', 'step': 3725, 'epoch': 1} {'type': 'loss', 'content': 0.17948313057422638, 'timestamp': '2025-09-10 02:37:33.863000', 'step': 3726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:33.916351', 'step': 3726, 'epoch': 1} {'type': 'loss', 'content': 0.1648872196674347, 'timestamp': '2025-09-10 02:37:33.918576', 'step': 3727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:33.971931', 'step': 3727, 'epoch': 1} {'type': 'loss', 'content': 0.15144045650959015, 'timestamp': '2025-09-10 02:37:33.978005', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:37:47.050848', 'step': 3728, 'epoch': 1} {'type': 'pplx', 'content': 11056.900515569194, 'timestamp': '2025-09-10 02:37:47.053805', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:47.111035', 'step': 3728, 'epoch': 1} {'type': 'loss', 'content': 0.12658539414405823, 'timestamp': '2025-09-10 02:37:47.117518', 'step': 3729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:47.171291', 'step': 3729, 'epoch': 1} {'type': 'loss', 'content': 0.18900686502456665, 'timestamp': '2025-09-10 02:37:47.174202', 'step': 3730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:47.229175', 'step': 3730, 'epoch': 1} {'type': 'loss', 'content': 0.16956831514835358, 'timestamp': '2025-09-10 02:37:47.231323', 'step': 3731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:47.289079', 'step': 3731, 'epoch': 1} {'type': 'loss', 'content': 0.22720663249492645, 'timestamp': '2025-09-10 02:37:47.294801', 'step': 3732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:47.348305', 'step': 3732, 'epoch': 1} {'type': 'loss', 'content': 0.18238890171051025, 'timestamp': '2025-09-10 02:37:47.350292', 'step': 3733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:47.405361', 'step': 3733, 'epoch': 1} {'type': 'loss', 'content': 0.22067303955554962, 'timestamp': '2025-09-10 02:37:47.407285', 'step': 3734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:47.467017', 'step': 3734, 'epoch': 1} {'type': 'loss', 'content': 0.1937478631734848, 'timestamp': '2025-09-10 02:37:47.468753', 'step': 3735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:47.524034', 'step': 3735, 'epoch': 1} {'type': 'loss', 'content': 0.22472341358661652, 'timestamp': '2025-09-10 02:37:47.530114', 'step': 3736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:47.583710', 'step': 3736, 'epoch': 1} {'type': 'loss', 'content': 0.1613408923149109, 'timestamp': '2025-09-10 02:37:47.585794', 'step': 3737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:47.644158', 'step': 3737, 'epoch': 1} {'type': 'loss', 'content': 0.0880202129483223, 'timestamp': '2025-09-10 02:37:47.646125', 'step': 3738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:47.700925', 'step': 3738, 'epoch': 1} {'type': 'loss', 'content': 0.16880683600902557, 'timestamp': '2025-09-10 02:37:47.707102', 'step': 3739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:47.768254', 'step': 3739, 'epoch': 1} {'type': 'loss', 'content': 0.20057761669158936, 'timestamp': '2025-09-10 02:37:47.779002', 'step': 3740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:47.837072', 'step': 3740, 'epoch': 1} {'type': 'loss', 'content': 0.21501904726028442, 'timestamp': '2025-09-10 02:37:47.838927', 'step': 3741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:47.892956', 'step': 3741, 'epoch': 1} {'type': 'loss', 'content': 0.11672716587781906, 'timestamp': '2025-09-10 02:37:47.895026', 'step': 3742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:47.949473', 'step': 3742, 'epoch': 1} {'type': 'loss', 'content': 0.16802455484867096, 'timestamp': '2025-09-10 02:37:47.951380', 'step': 3743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:48.004716', 'step': 3743, 'epoch': 1} {'type': 'loss', 'content': 0.2117798924446106, 'timestamp': '2025-09-10 02:37:48.010610', 'step': 3744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:48.063795', 'step': 3744, 'epoch': 1} {'type': 'loss', 'content': 0.15842919051647186, 'timestamp': '2025-09-10 02:37:48.065805', 'step': 3745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:48.119556', 'step': 3745, 'epoch': 1} {'type': 'loss', 'content': 0.13067616522312164, 'timestamp': '2025-09-10 02:37:48.121251', 'step': 3746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:48.174351', 'step': 3746, 'epoch': 1} {'type': 'loss', 'content': 0.14040908217430115, 'timestamp': '2025-09-10 02:37:48.176332', 'step': 3747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:48.230155', 'step': 3747, 'epoch': 1} {'type': 'loss', 'content': 0.15572980046272278, 'timestamp': '2025-09-10 02:37:48.235730', 'step': 3748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:48.288489', 'step': 3748, 'epoch': 1} {'type': 'loss', 'content': 0.2351149320602417, 'timestamp': '2025-09-10 02:37:48.290184', 'step': 3749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:48.344897', 'step': 3749, 'epoch': 1} {'type': 'loss', 'content': 0.1833731234073639, 'timestamp': '2025-09-10 02:37:48.346918', 'step': 3750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:48.401066', 'step': 3750, 'epoch': 1} {'type': 'loss', 'content': 0.18577425181865692, 'timestamp': '2025-09-10 02:37:48.403440', 'step': 3751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:48.459879', 'step': 3751, 'epoch': 1} {'type': 'loss', 'content': 0.08015619963407516, 'timestamp': '2025-09-10 02:37:48.465937', 'step': 3752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:48.522032', 'step': 3752, 'epoch': 1} {'type': 'loss', 'content': 0.2050081044435501, 'timestamp': '2025-09-10 02:37:48.523864', 'step': 3753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:48.579400', 'step': 3753, 'epoch': 1} {'type': 'loss', 'content': 0.14221636950969696, 'timestamp': '2025-09-10 02:37:48.581058', 'step': 3754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:48.634822', 'step': 3754, 'epoch': 1} {'type': 'loss', 'content': 0.13327638804912567, 'timestamp': '2025-09-10 02:37:48.636746', 'step': 3755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:48.691170', 'step': 3755, 'epoch': 1} {'type': 'loss', 'content': 0.2024718075990677, 'timestamp': '2025-09-10 02:37:48.697121', 'step': 3756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:48.749644', 'step': 3756, 'epoch': 1} {'type': 'loss', 'content': 0.15534844994544983, 'timestamp': '2025-09-10 02:37:48.751453', 'step': 3757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:48.805115', 'step': 3757, 'epoch': 1} {'type': 'loss', 'content': 0.2504846453666687, 'timestamp': '2025-09-10 02:37:48.807072', 'step': 3758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:48.861335', 'step': 3758, 'epoch': 1} {'type': 'loss', 'content': 0.11139972507953644, 'timestamp': '2025-09-10 02:37:48.863303', 'step': 3759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:48.922397', 'step': 3759, 'epoch': 1} {'type': 'loss', 'content': 0.08607799559831619, 'timestamp': '2025-09-10 02:37:48.927941', 'step': 3760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:48.981365', 'step': 3760, 'epoch': 1} {'type': 'loss', 'content': 0.14152881503105164, 'timestamp': '2025-09-10 02:37:48.983564', 'step': 3761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:49.037908', 'step': 3761, 'epoch': 1} {'type': 'loss', 'content': 0.12520389258861542, 'timestamp': '2025-09-10 02:37:49.039864', 'step': 3762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:49.094706', 'step': 3762, 'epoch': 1} {'type': 'loss', 'content': 0.14389200508594513, 'timestamp': '2025-09-10 02:37:49.096598', 'step': 3763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:49.150535', 'step': 3763, 'epoch': 1} {'type': 'loss', 'content': 0.1498996764421463, 'timestamp': '2025-09-10 02:37:49.156393', 'step': 3764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:49.210485', 'step': 3764, 'epoch': 1} {'type': 'loss', 'content': 0.10803177952766418, 'timestamp': '2025-09-10 02:37:49.212526', 'step': 3765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:49.265765', 'step': 3765, 'epoch': 1} {'type': 'loss', 'content': 0.07378753274679184, 'timestamp': '2025-09-10 02:37:49.267836', 'step': 3766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:49.321557', 'step': 3766, 'epoch': 1} {'type': 'loss', 'content': 0.13463915884494781, 'timestamp': '2025-09-10 02:37:49.323604', 'step': 3767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:49.377578', 'step': 3767, 'epoch': 1} {'type': 'loss', 'content': 0.19429054856300354, 'timestamp': '2025-09-10 02:37:49.383425', 'step': 3768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:49.437708', 'step': 3768, 'epoch': 1} {'type': 'loss', 'content': 0.18943029642105103, 'timestamp': '2025-09-10 02:37:49.439792', 'step': 3769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:49.493638', 'step': 3769, 'epoch': 1} {'type': 'loss', 'content': 0.13351133465766907, 'timestamp': '2025-09-10 02:37:49.495827', 'step': 3770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:49.550307', 'step': 3770, 'epoch': 1} {'type': 'loss', 'content': 0.13665543496608734, 'timestamp': '2025-09-10 02:37:49.552344', 'step': 3771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:49.606502', 'step': 3771, 'epoch': 1} {'type': 'loss', 'content': 0.11650712788105011, 'timestamp': '2025-09-10 02:37:49.612021', 'step': 3772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:49.665267', 'step': 3772, 'epoch': 1} {'type': 'loss', 'content': 0.17134062945842743, 'timestamp': '2025-09-10 02:37:49.667352', 'step': 3773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:49.720400', 'step': 3773, 'epoch': 1} {'type': 'loss', 'content': 0.2198037952184677, 'timestamp': '2025-09-10 02:37:49.722304', 'step': 3774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:49.775670', 'step': 3774, 'epoch': 1} {'type': 'loss', 'content': 0.07057458907365799, 'timestamp': '2025-09-10 02:37:49.777495', 'step': 3775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:49.830805', 'step': 3775, 'epoch': 1} {'type': 'loss', 'content': 0.1599961817264557, 'timestamp': '2025-09-10 02:37:49.836233', 'step': 3776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:49.889061', 'step': 3776, 'epoch': 1} {'type': 'loss', 'content': 0.19422677159309387, 'timestamp': '2025-09-10 02:37:49.890896', 'step': 3777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:49.944923', 'step': 3777, 'epoch': 1} {'type': 'loss', 'content': 0.203141450881958, 'timestamp': '2025-09-10 02:37:49.946691', 'step': 3778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:50.001102', 'step': 3778, 'epoch': 1} {'type': 'loss', 'content': 0.1471656709909439, 'timestamp': '2025-09-10 02:37:50.003135', 'step': 3779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:50.056444', 'step': 3779, 'epoch': 1} {'type': 'loss', 'content': 0.1392608880996704, 'timestamp': '2025-09-10 02:37:50.062124', 'step': 3780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:50.115300', 'step': 3780, 'epoch': 1} {'type': 'loss', 'content': 0.14642459154129028, 'timestamp': '2025-09-10 02:37:50.117199', 'step': 3781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:50.170516', 'step': 3781, 'epoch': 1} {'type': 'loss', 'content': 0.10801109671592712, 'timestamp': '2025-09-10 02:37:50.172609', 'step': 3782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:50.229295', 'step': 3782, 'epoch': 1} {'type': 'loss', 'content': 0.23707661032676697, 'timestamp': '2025-09-10 02:37:50.231522', 'step': 3783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:50.284944', 'step': 3783, 'epoch': 1} {'type': 'loss', 'content': 0.13270771503448486, 'timestamp': '2025-09-10 02:37:50.290684', 'step': 3784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:50.344467', 'step': 3784, 'epoch': 1} {'type': 'loss', 'content': 0.20521730184555054, 'timestamp': '2025-09-10 02:37:50.346642', 'step': 3785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:50.401509', 'step': 3785, 'epoch': 1} {'type': 'loss', 'content': 0.09625083208084106, 'timestamp': '2025-09-10 02:37:50.403606', 'step': 3786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:50.457353', 'step': 3786, 'epoch': 1} {'type': 'loss', 'content': 0.1458502858877182, 'timestamp': '2025-09-10 02:37:50.460377', 'step': 3787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:50.515810', 'step': 3787, 'epoch': 1} {'type': 'loss', 'content': 0.16554705798625946, 'timestamp': '2025-09-10 02:37:50.521652', 'step': 3788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:50.574766', 'step': 3788, 'epoch': 1} {'type': 'loss', 'content': 0.20159728825092316, 'timestamp': '2025-09-10 02:37:50.576915', 'step': 3789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:50.630483', 'step': 3789, 'epoch': 1} {'type': 'loss', 'content': 0.16931019723415375, 'timestamp': '2025-09-10 02:37:50.632638', 'step': 3790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:50.685880', 'step': 3790, 'epoch': 1} {'type': 'loss', 'content': 0.17027100920677185, 'timestamp': '2025-09-10 02:37:50.687918', 'step': 3791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:50.741411', 'step': 3791, 'epoch': 1} {'type': 'loss', 'content': 0.1803475171327591, 'timestamp': '2025-09-10 02:37:50.747312', 'step': 3792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:50.799915', 'step': 3792, 'epoch': 1} {'type': 'loss', 'content': 0.24774318933486938, 'timestamp': '2025-09-10 02:37:50.801862', 'step': 3793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:50.854937', 'step': 3793, 'epoch': 1} {'type': 'loss', 'content': 0.19838926196098328, 'timestamp': '2025-09-10 02:37:50.856925', 'step': 3794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:50.910762', 'step': 3794, 'epoch': 1} {'type': 'loss', 'content': 0.2146538347005844, 'timestamp': '2025-09-10 02:37:50.912987', 'step': 3795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:50.966344', 'step': 3795, 'epoch': 1} {'type': 'loss', 'content': 0.18182092905044556, 'timestamp': '2025-09-10 02:37:50.971932', 'step': 3796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:51.027348', 'step': 3796, 'epoch': 1} {'type': 'loss', 'content': 0.07503645867109299, 'timestamp': '2025-09-10 02:37:51.029613', 'step': 3797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:51.083956', 'step': 3797, 'epoch': 1} {'type': 'loss', 'content': 0.15147680044174194, 'timestamp': '2025-09-10 02:37:51.085878', 'step': 3798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:51.139850', 'step': 3798, 'epoch': 1} {'type': 'loss', 'content': 0.29107898473739624, 'timestamp': '2025-09-10 02:37:51.142127', 'step': 3799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:51.195969', 'step': 3799, 'epoch': 1} {'type': 'loss', 'content': 0.05363553762435913, 'timestamp': '2025-09-10 02:37:51.201826', 'step': 3800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:51.255844', 'step': 3800, 'epoch': 1} {'type': 'loss', 'content': 0.1251024752855301, 'timestamp': '2025-09-10 02:37:51.257775', 'step': 3801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:51.311004', 'step': 3801, 'epoch': 1} {'type': 'loss', 'content': 0.1570071429014206, 'timestamp': '2025-09-10 02:37:51.312940', 'step': 3802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:51.366438', 'step': 3802, 'epoch': 1} {'type': 'loss', 'content': 0.11777377128601074, 'timestamp': '2025-09-10 02:37:51.368609', 'step': 3803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:51.421962', 'step': 3803, 'epoch': 1} {'type': 'loss', 'content': 0.18789860606193542, 'timestamp': '2025-09-10 02:37:51.427733', 'step': 3804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:51.484064', 'step': 3804, 'epoch': 1} {'type': 'loss', 'content': 0.1498408019542694, 'timestamp': '2025-09-10 02:37:51.486219', 'step': 3805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:51.539707', 'step': 3805, 'epoch': 1} {'type': 'loss', 'content': 0.11168559640645981, 'timestamp': '2025-09-10 02:37:51.541895', 'step': 3806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:51.596809', 'step': 3806, 'epoch': 1} {'type': 'loss', 'content': 0.23992013931274414, 'timestamp': '2025-09-10 02:37:51.598954', 'step': 3807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:51.652097', 'step': 3807, 'epoch': 1} {'type': 'loss', 'content': 0.12639524042606354, 'timestamp': '2025-09-10 02:37:51.658099', 'step': 3808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:51.711448', 'step': 3808, 'epoch': 1} {'type': 'loss', 'content': 0.14937947690486908, 'timestamp': '2025-09-10 02:37:51.713641', 'step': 3809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:51.767032', 'step': 3809, 'epoch': 1} {'type': 'loss', 'content': 0.19767165184020996, 'timestamp': '2025-09-10 02:37:51.769219', 'step': 3810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:51.822715', 'step': 3810, 'epoch': 1} {'type': 'loss', 'content': 0.15971671044826508, 'timestamp': '2025-09-10 02:37:51.824887', 'step': 3811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:51.878251', 'step': 3811, 'epoch': 1} {'type': 'loss', 'content': 0.18793168663978577, 'timestamp': '2025-09-10 02:37:51.884236', 'step': 3812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:51.937379', 'step': 3812, 'epoch': 1} {'type': 'loss', 'content': 0.16568900644779205, 'timestamp': '2025-09-10 02:37:51.939512', 'step': 3813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:51.993929', 'step': 3813, 'epoch': 1} {'type': 'loss', 'content': 0.14443339407444, 'timestamp': '2025-09-10 02:37:51.996311', 'step': 3814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:52.050358', 'step': 3814, 'epoch': 1} {'type': 'loss', 'content': 0.11939512938261032, 'timestamp': '2025-09-10 02:37:52.052700', 'step': 3815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:52.107199', 'step': 3815, 'epoch': 1} {'type': 'loss', 'content': 0.2074374556541443, 'timestamp': '2025-09-10 02:37:52.113130', 'step': 3816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:52.166365', 'step': 3816, 'epoch': 1} {'type': 'loss', 'content': 0.1714693158864975, 'timestamp': '2025-09-10 02:37:52.168463', 'step': 3817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:52.222120', 'step': 3817, 'epoch': 1} {'type': 'loss', 'content': 0.10449565947055817, 'timestamp': '2025-09-10 02:37:52.224267', 'step': 3818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:52.277886', 'step': 3818, 'epoch': 1} {'type': 'loss', 'content': 0.16485826671123505, 'timestamp': '2025-09-10 02:37:52.280366', 'step': 3819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:52.333247', 'step': 3819, 'epoch': 1} {'type': 'loss', 'content': 0.14917665719985962, 'timestamp': '2025-09-10 02:37:52.339045', 'step': 3820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:52.393711', 'step': 3820, 'epoch': 1} {'type': 'loss', 'content': 0.12088359892368317, 'timestamp': '2025-09-10 02:37:52.395876', 'step': 3821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:52.455739', 'step': 3821, 'epoch': 1} {'type': 'loss', 'content': 0.1859702616930008, 'timestamp': '2025-09-10 02:37:52.457905', 'step': 3822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:52.512187', 'step': 3822, 'epoch': 1} {'type': 'loss', 'content': 0.0994475707411766, 'timestamp': '2025-09-10 02:37:52.514359', 'step': 3823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:52.567810', 'step': 3823, 'epoch': 1} {'type': 'loss', 'content': 0.2086958885192871, 'timestamp': '2025-09-10 02:37:52.573576', 'step': 3824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:52.627999', 'step': 3824, 'epoch': 1} {'type': 'loss', 'content': 0.12363056093454361, 'timestamp': '2025-09-10 02:37:52.629985', 'step': 3825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:52.685111', 'step': 3825, 'epoch': 1} {'type': 'loss', 'content': 0.20286382734775543, 'timestamp': '2025-09-10 02:37:52.687339', 'step': 3826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:52.741976', 'step': 3826, 'epoch': 1} {'type': 'loss', 'content': 0.1417996883392334, 'timestamp': '2025-09-10 02:37:52.744288', 'step': 3827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:52.800958', 'step': 3827, 'epoch': 1} {'type': 'loss', 'content': 0.20904983580112457, 'timestamp': '2025-09-10 02:37:52.807173', 'step': 3828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:52.860432', 'step': 3828, 'epoch': 1} {'type': 'loss', 'content': 0.16281931102275848, 'timestamp': '2025-09-10 02:37:52.862738', 'step': 3829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:52.917257', 'step': 3829, 'epoch': 1} {'type': 'loss', 'content': 0.1263972371816635, 'timestamp': '2025-09-10 02:37:52.919521', 'step': 3830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:52.973396', 'step': 3830, 'epoch': 1} {'type': 'loss', 'content': 0.15486936271190643, 'timestamp': '2025-09-10 02:37:52.975633', 'step': 3831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:53.030816', 'step': 3831, 'epoch': 1} {'type': 'loss', 'content': 0.19891437888145447, 'timestamp': '2025-09-10 02:37:53.036820', 'step': 3832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:53.089746', 'step': 3832, 'epoch': 1} {'type': 'loss', 'content': 0.24181637167930603, 'timestamp': '2025-09-10 02:37:53.092123', 'step': 3833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:53.145812', 'step': 3833, 'epoch': 1} {'type': 'loss', 'content': 0.21069245040416718, 'timestamp': '2025-09-10 02:37:53.148197', 'step': 3834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:53.207663', 'step': 3834, 'epoch': 1} {'type': 'loss', 'content': 0.24591217935085297, 'timestamp': '2025-09-10 02:37:53.210068', 'step': 3835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:53.264112', 'step': 3835, 'epoch': 1} {'type': 'loss', 'content': 0.16466748714447021, 'timestamp': '2025-09-10 02:37:53.270297', 'step': 3836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:53.323702', 'step': 3836, 'epoch': 1} {'type': 'loss', 'content': 0.19442491233348846, 'timestamp': '2025-09-10 02:37:53.325855', 'step': 3837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:53.379948', 'step': 3837, 'epoch': 1} {'type': 'loss', 'content': 0.12848249077796936, 'timestamp': '2025-09-10 02:37:53.382162', 'step': 3838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:53.438890', 'step': 3838, 'epoch': 1} {'type': 'loss', 'content': 0.16955898702144623, 'timestamp': '2025-09-10 02:37:53.441189', 'step': 3839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:53.496028', 'step': 3839, 'epoch': 1} {'type': 'loss', 'content': 0.1649312973022461, 'timestamp': '2025-09-10 02:37:53.501924', 'step': 3840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:53.556049', 'step': 3840, 'epoch': 1} {'type': 'loss', 'content': 0.17098288238048553, 'timestamp': '2025-09-10 02:37:53.558305', 'step': 3841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:53.612138', 'step': 3841, 'epoch': 1} {'type': 'loss', 'content': 0.22854526340961456, 'timestamp': '2025-09-10 02:37:53.614544', 'step': 3842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:53.668773', 'step': 3842, 'epoch': 1} {'type': 'loss', 'content': 0.20551124215126038, 'timestamp': '2025-09-10 02:37:53.671230', 'step': 3843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:53.726489', 'step': 3843, 'epoch': 1} {'type': 'loss', 'content': 0.2039152979850769, 'timestamp': '2025-09-10 02:37:53.733061', 'step': 3844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:53.788170', 'step': 3844, 'epoch': 1} {'type': 'loss', 'content': 0.2625470459461212, 'timestamp': '2025-09-10 02:37:53.790207', 'step': 3845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:53.845030', 'step': 3845, 'epoch': 1} {'type': 'loss', 'content': 0.1912754476070404, 'timestamp': '2025-09-10 02:37:53.847323', 'step': 3846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:53.903106', 'step': 3846, 'epoch': 1} {'type': 'loss', 'content': 0.27866190671920776, 'timestamp': '2025-09-10 02:37:53.905201', 'step': 3847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:53.962174', 'step': 3847, 'epoch': 1} {'type': 'loss', 'content': 0.2331872284412384, 'timestamp': '2025-09-10 02:37:53.968367', 'step': 3848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:54.023883', 'step': 3848, 'epoch': 1} {'type': 'loss', 'content': 0.2163187861442566, 'timestamp': '2025-09-10 02:37:54.026162', 'step': 3849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:54.081159', 'step': 3849, 'epoch': 1} {'type': 'loss', 'content': 0.1605706661939621, 'timestamp': '2025-09-10 02:37:54.083346', 'step': 3850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:54.138776', 'step': 3850, 'epoch': 1} {'type': 'loss', 'content': 0.1944892555475235, 'timestamp': '2025-09-10 02:37:54.141033', 'step': 3851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:54.197173', 'step': 3851, 'epoch': 1} {'type': 'loss', 'content': 0.2089274525642395, 'timestamp': '2025-09-10 02:37:54.203690', 'step': 3852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:54.259733', 'step': 3852, 'epoch': 1} {'type': 'loss', 'content': 0.20410564541816711, 'timestamp': '2025-09-10 02:37:54.262020', 'step': 3853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:54.319700', 'step': 3853, 'epoch': 1} {'type': 'loss', 'content': 0.1584552526473999, 'timestamp': '2025-09-10 02:37:54.321901', 'step': 3854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:54.378850', 'step': 3854, 'epoch': 1} {'type': 'loss', 'content': 0.1644536405801773, 'timestamp': '2025-09-10 02:37:54.381093', 'step': 3855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:54.439317', 'step': 3855, 'epoch': 1} {'type': 'loss', 'content': 0.17937423288822174, 'timestamp': '2025-09-10 02:37:54.445760', 'step': 3856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:54.503089', 'step': 3856, 'epoch': 1} {'type': 'loss', 'content': 0.1610860824584961, 'timestamp': '2025-09-10 02:37:54.505434', 'step': 3857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:54.560901', 'step': 3857, 'epoch': 1} {'type': 'loss', 'content': 0.1776440590620041, 'timestamp': '2025-09-10 02:37:54.562959', 'step': 3858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:54.616602', 'step': 3858, 'epoch': 1} {'type': 'loss', 'content': 0.12779206037521362, 'timestamp': '2025-09-10 02:37:54.618621', 'step': 3859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:54.672940', 'step': 3859, 'epoch': 1} {'type': 'loss', 'content': 0.10073654353618622, 'timestamp': '2025-09-10 02:37:54.679108', 'step': 3860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:54.733482', 'step': 3860, 'epoch': 1} {'type': 'loss', 'content': 0.2608834207057953, 'timestamp': '2025-09-10 02:37:54.736073', 'step': 3861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:54.793548', 'step': 3861, 'epoch': 1} {'type': 'loss', 'content': 0.158247709274292, 'timestamp': '2025-09-10 02:37:54.795891', 'step': 3862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:54.852701', 'step': 3862, 'epoch': 1} {'type': 'loss', 'content': 0.16238407790660858, 'timestamp': '2025-09-10 02:37:54.854941', 'step': 3863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:54.910263', 'step': 3863, 'epoch': 1} {'type': 'loss', 'content': 0.09212719649076462, 'timestamp': '2025-09-10 02:37:54.916787', 'step': 3864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:54.975503', 'step': 3864, 'epoch': 1} {'type': 'loss', 'content': 0.2028706669807434, 'timestamp': '2025-09-10 02:37:54.977589', 'step': 3865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:55.032350', 'step': 3865, 'epoch': 1} {'type': 'loss', 'content': 0.26182621717453003, 'timestamp': '2025-09-10 02:37:55.034556', 'step': 3866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:55.089797', 'step': 3866, 'epoch': 1} {'type': 'loss', 'content': 0.1720695197582245, 'timestamp': '2025-09-10 02:37:55.092003', 'step': 3867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:55.147582', 'step': 3867, 'epoch': 1} {'type': 'loss', 'content': 0.1066230833530426, 'timestamp': '2025-09-10 02:37:55.153936', 'step': 3868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:55.208795', 'step': 3868, 'epoch': 1} {'type': 'loss', 'content': 0.17413096129894257, 'timestamp': '2025-09-10 02:37:55.210498', 'step': 3869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:55.264953', 'step': 3869, 'epoch': 1} {'type': 'loss', 'content': 0.1831609457731247, 'timestamp': '2025-09-10 02:37:55.277789', 'step': 3870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:55.343080', 'step': 3870, 'epoch': 1} {'type': 'loss', 'content': 0.12078558653593063, 'timestamp': '2025-09-10 02:37:55.345548', 'step': 3871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:55.401062', 'step': 3871, 'epoch': 1} {'type': 'loss', 'content': 0.2632034420967102, 'timestamp': '2025-09-10 02:37:55.412973', 'step': 3872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:55.476653', 'step': 3872, 'epoch': 1} {'type': 'loss', 'content': 0.23385393619537354, 'timestamp': '2025-09-10 02:37:55.479079', 'step': 3873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:55.535535', 'step': 3873, 'epoch': 1} {'type': 'loss', 'content': 0.1408126950263977, 'timestamp': '2025-09-10 02:37:55.537727', 'step': 3874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:55.592581', 'step': 3874, 'epoch': 1} {'type': 'loss', 'content': 0.09553506225347519, 'timestamp': '2025-09-10 02:37:55.594799', 'step': 3875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:55.649304', 'step': 3875, 'epoch': 1} {'type': 'loss', 'content': 0.13597771525382996, 'timestamp': '2025-09-10 02:37:55.655698', 'step': 3876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:55.710872', 'step': 3876, 'epoch': 1} {'type': 'loss', 'content': 0.1317218691110611, 'timestamp': '2025-09-10 02:37:55.712896', 'step': 3877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:55.767799', 'step': 3877, 'epoch': 1} {'type': 'loss', 'content': 0.10255604982376099, 'timestamp': '2025-09-10 02:37:55.769606', 'step': 3878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:55.826384', 'step': 3878, 'epoch': 1} {'type': 'loss', 'content': 0.21021848917007446, 'timestamp': '2025-09-10 02:37:55.828513', 'step': 3879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:55.886703', 'step': 3879, 'epoch': 1} {'type': 'loss', 'content': 0.11452410370111465, 'timestamp': '2025-09-10 02:37:55.893746', 'step': 3880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:55.953257', 'step': 3880, 'epoch': 1} {'type': 'loss', 'content': 0.19525139033794403, 'timestamp': '2025-09-10 02:37:55.955615', 'step': 3881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:56.012907', 'step': 3881, 'epoch': 1} {'type': 'loss', 'content': 0.18219716846942902, 'timestamp': '2025-09-10 02:37:56.015019', 'step': 3882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:56.073001', 'step': 3882, 'epoch': 1} {'type': 'loss', 'content': 0.12070415169000626, 'timestamp': '2025-09-10 02:37:56.075121', 'step': 3883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:56.132207', 'step': 3883, 'epoch': 1} {'type': 'loss', 'content': 0.18376052379608154, 'timestamp': '2025-09-10 02:37:56.138891', 'step': 3884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:56.195209', 'step': 3884, 'epoch': 1} {'type': 'loss', 'content': 0.2303168922662735, 'timestamp': '2025-09-10 02:37:56.197468', 'step': 3885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:56.261233', 'step': 3885, 'epoch': 1} {'type': 'loss', 'content': 0.14681673049926758, 'timestamp': '2025-09-10 02:37:56.263912', 'step': 3886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:56.321585', 'step': 3886, 'epoch': 1} {'type': 'loss', 'content': 0.1464986652135849, 'timestamp': '2025-09-10 02:37:56.324097', 'step': 3887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:56.381235', 'step': 3887, 'epoch': 1} {'type': 'loss', 'content': 0.15616340935230255, 'timestamp': '2025-09-10 02:37:56.388201', 'step': 3888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:56.444678', 'step': 3888, 'epoch': 1} {'type': 'loss', 'content': 0.20354077219963074, 'timestamp': '2025-09-10 02:37:56.447073', 'step': 3889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:56.503484', 'step': 3889, 'epoch': 1} {'type': 'loss', 'content': 0.19950099289417267, 'timestamp': '2025-09-10 02:37:56.505765', 'step': 3890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:56.561862', 'step': 3890, 'epoch': 1} {'type': 'loss', 'content': 0.1686982959508896, 'timestamp': '2025-09-10 02:37:56.564211', 'step': 3891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:56.619128', 'step': 3891, 'epoch': 1} {'type': 'loss', 'content': 0.1886904090642929, 'timestamp': '2025-09-10 02:37:56.625537', 'step': 3892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:56.679628', 'step': 3892, 'epoch': 1} {'type': 'loss', 'content': 0.1460184007883072, 'timestamp': '2025-09-10 02:37:56.681675', 'step': 3893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:56.736117', 'step': 3893, 'epoch': 1} {'type': 'loss', 'content': 0.1693405956029892, 'timestamp': '2025-09-10 02:37:56.738364', 'step': 3894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:56.792957', 'step': 3894, 'epoch': 1} {'type': 'loss', 'content': 0.15705709159374237, 'timestamp': '2025-09-10 02:37:56.795150', 'step': 3895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:56.853744', 'step': 3895, 'epoch': 1} {'type': 'loss', 'content': 0.14593026041984558, 'timestamp': '2025-09-10 02:37:56.859928', 'step': 3896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:56.913432', 'step': 3896, 'epoch': 1} {'type': 'loss', 'content': 0.12330678850412369, 'timestamp': '2025-09-10 02:37:56.915587', 'step': 3897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:56.969255', 'step': 3897, 'epoch': 1} {'type': 'loss', 'content': 0.1790478229522705, 'timestamp': '2025-09-10 02:37:56.971726', 'step': 3898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:57.025887', 'step': 3898, 'epoch': 1} {'type': 'loss', 'content': 0.2398284524679184, 'timestamp': '2025-09-10 02:37:57.028082', 'step': 3899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:57.081412', 'step': 3899, 'epoch': 1} {'type': 'loss', 'content': 0.18232804536819458, 'timestamp': '2025-09-10 02:37:57.087859', 'step': 3900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:57.142552', 'step': 3900, 'epoch': 1} {'type': 'loss', 'content': 0.20410162210464478, 'timestamp': '2025-09-10 02:37:57.144795', 'step': 3901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:57.200359', 'step': 3901, 'epoch': 1} {'type': 'loss', 'content': 0.11195820569992065, 'timestamp': '2025-09-10 02:37:57.202624', 'step': 3902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:57.258466', 'step': 3902, 'epoch': 1} {'type': 'loss', 'content': 0.15394693613052368, 'timestamp': '2025-09-10 02:37:57.260730', 'step': 3903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:57.315200', 'step': 3903, 'epoch': 1} {'type': 'loss', 'content': 0.14477159082889557, 'timestamp': '2025-09-10 02:37:57.321459', 'step': 3904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:57.376237', 'step': 3904, 'epoch': 1} {'type': 'loss', 'content': 0.17882226407527924, 'timestamp': '2025-09-10 02:37:57.378512', 'step': 3905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:57.433736', 'step': 3905, 'epoch': 1} {'type': 'loss', 'content': 0.12984199821949005, 'timestamp': '2025-09-10 02:37:57.435935', 'step': 3906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:57.492195', 'step': 3906, 'epoch': 1} {'type': 'loss', 'content': 0.1847057342529297, 'timestamp': '2025-09-10 02:37:57.494662', 'step': 3907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:57.549696', 'step': 3907, 'epoch': 1} {'type': 'loss', 'content': 0.11913184076547623, 'timestamp': '2025-09-10 02:37:57.556270', 'step': 3908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:57.611016', 'step': 3908, 'epoch': 1} {'type': 'loss', 'content': 0.1956484168767929, 'timestamp': '2025-09-10 02:37:57.613389', 'step': 3909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:57.668849', 'step': 3909, 'epoch': 1} {'type': 'loss', 'content': 0.1908227503299713, 'timestamp': '2025-09-10 02:37:57.671115', 'step': 3910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:57.726000', 'step': 3910, 'epoch': 1} {'type': 'loss', 'content': 0.15780267119407654, 'timestamp': '2025-09-10 02:37:57.728243', 'step': 3911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:57.783388', 'step': 3911, 'epoch': 1} {'type': 'loss', 'content': 0.12945203483104706, 'timestamp': '2025-09-10 02:37:57.789710', 'step': 3912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:57.844173', 'step': 3912, 'epoch': 1} {'type': 'loss', 'content': 0.13718988001346588, 'timestamp': '2025-09-10 02:37:57.846148', 'step': 3913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:57.900487', 'step': 3913, 'epoch': 1} {'type': 'loss', 'content': 0.13396458327770233, 'timestamp': '2025-09-10 02:37:57.902989', 'step': 3914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:57.959844', 'step': 3914, 'epoch': 1} {'type': 'loss', 'content': 0.18928669393062592, 'timestamp': '2025-09-10 02:37:57.962446', 'step': 3915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:58.021542', 'step': 3915, 'epoch': 1} {'type': 'loss', 'content': 0.10849602520465851, 'timestamp': '2025-09-10 02:37:58.028353', 'step': 3916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:58.083145', 'step': 3916, 'epoch': 1} {'type': 'loss', 'content': 0.22112591564655304, 'timestamp': '2025-09-10 02:37:58.085402', 'step': 3917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:58.139170', 'step': 3917, 'epoch': 1} {'type': 'loss', 'content': 0.2681073248386383, 'timestamp': '2025-09-10 02:37:58.141409', 'step': 3918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:58.195060', 'step': 3918, 'epoch': 1} {'type': 'loss', 'content': 0.2481604665517807, 'timestamp': '2025-09-10 02:37:58.197285', 'step': 3919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:58.251457', 'step': 3919, 'epoch': 1} {'type': 'loss', 'content': 0.2184232920408249, 'timestamp': '2025-09-10 02:37:58.257420', 'step': 3920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:58.310196', 'step': 3920, 'epoch': 1} {'type': 'loss', 'content': 0.13342660665512085, 'timestamp': '2025-09-10 02:37:58.312443', 'step': 3921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:58.365409', 'step': 3921, 'epoch': 1} {'type': 'loss', 'content': 0.1923123002052307, 'timestamp': '2025-09-10 02:37:58.367664', 'step': 3922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:58.421106', 'step': 3922, 'epoch': 1} {'type': 'loss', 'content': 0.15063844621181488, 'timestamp': '2025-09-10 02:37:58.423324', 'step': 3923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:58.477171', 'step': 3923, 'epoch': 1} {'type': 'loss', 'content': 0.14487934112548828, 'timestamp': '2025-09-10 02:37:58.483382', 'step': 3924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:58.536991', 'step': 3924, 'epoch': 1} {'type': 'loss', 'content': 0.12005861848592758, 'timestamp': '2025-09-10 02:37:58.539365', 'step': 3925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:58.593585', 'step': 3925, 'epoch': 1} {'type': 'loss', 'content': 0.19783872365951538, 'timestamp': '2025-09-10 02:37:58.595775', 'step': 3926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:58.650361', 'step': 3926, 'epoch': 1} {'type': 'loss', 'content': 0.1467410773038864, 'timestamp': '2025-09-10 02:37:58.652475', 'step': 3927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:58.706385', 'step': 3927, 'epoch': 1} {'type': 'loss', 'content': 0.17844049632549286, 'timestamp': '2025-09-10 02:37:58.712720', 'step': 3928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:58.766462', 'step': 3928, 'epoch': 1} {'type': 'loss', 'content': 0.12769538164138794, 'timestamp': '2025-09-10 02:37:58.768836', 'step': 3929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:58.824779', 'step': 3929, 'epoch': 1} {'type': 'loss', 'content': 0.13345281779766083, 'timestamp': '2025-09-10 02:37:58.827313', 'step': 3930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:58.883377', 'step': 3930, 'epoch': 1} {'type': 'loss', 'content': 0.2488761991262436, 'timestamp': '2025-09-10 02:37:58.885674', 'step': 3931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:58.941144', 'step': 3931, 'epoch': 1} {'type': 'loss', 'content': 0.15596376359462738, 'timestamp': '2025-09-10 02:37:58.947652', 'step': 3932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:59.002991', 'step': 3932, 'epoch': 1} {'type': 'loss', 'content': 0.175221785902977, 'timestamp': '2025-09-10 02:37:59.005286', 'step': 3933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:59.062859', 'step': 3933, 'epoch': 1} {'type': 'loss', 'content': 0.1449236273765564, 'timestamp': '2025-09-10 02:37:59.065151', 'step': 3934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:59.120828', 'step': 3934, 'epoch': 1} {'type': 'loss', 'content': 0.20724615454673767, 'timestamp': '2025-09-10 02:37:59.122984', 'step': 3935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:59.177216', 'step': 3935, 'epoch': 1} {'type': 'loss', 'content': 0.10311123728752136, 'timestamp': '2025-09-10 02:37:59.183633', 'step': 3936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:59.238014', 'step': 3936, 'epoch': 1} {'type': 'loss', 'content': 0.16606374084949493, 'timestamp': '2025-09-10 02:37:59.240258', 'step': 3937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:59.294711', 'step': 3937, 'epoch': 1} {'type': 'loss', 'content': 0.1545669287443161, 'timestamp': '2025-09-10 02:37:59.296895', 'step': 3938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:59.351549', 'step': 3938, 'epoch': 1} {'type': 'loss', 'content': 0.20676182210445404, 'timestamp': '2025-09-10 02:37:59.353951', 'step': 3939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:37:59.409030', 'step': 3939, 'epoch': 1} {'type': 'loss', 'content': 0.1893371045589447, 'timestamp': '2025-09-10 02:37:59.415404', 'step': 3940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:59.470451', 'step': 3940, 'epoch': 1} {'type': 'loss', 'content': 0.20502138137817383, 'timestamp': '2025-09-10 02:37:59.472775', 'step': 3941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:59.529940', 'step': 3941, 'epoch': 1} {'type': 'loss', 'content': 0.22226457297801971, 'timestamp': '2025-09-10 02:37:59.532241', 'step': 3942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:59.592809', 'step': 3942, 'epoch': 1} {'type': 'loss', 'content': 0.2097272276878357, 'timestamp': '2025-09-10 02:37:59.595197', 'step': 3943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:59.652217', 'step': 3943, 'epoch': 1} {'type': 'loss', 'content': 0.1916569173336029, 'timestamp': '2025-09-10 02:37:59.658893', 'step': 3944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:37:59.722708', 'step': 3944, 'epoch': 1} {'type': 'loss', 'content': 0.20617824792861938, 'timestamp': '2025-09-10 02:37:59.724912', 'step': 3945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:59.779664', 'step': 3945, 'epoch': 1} {'type': 'loss', 'content': 0.23437844216823578, 'timestamp': '2025-09-10 02:37:59.781936', 'step': 3946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:59.840027', 'step': 3946, 'epoch': 1} {'type': 'loss', 'content': 0.17854030430316925, 'timestamp': '2025-09-10 02:37:59.842398', 'step': 3947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:37:59.897251', 'step': 3947, 'epoch': 1} {'type': 'loss', 'content': 0.2097463309764862, 'timestamp': '2025-09-10 02:37:59.903614', 'step': 3948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:59.957097', 'step': 3948, 'epoch': 1} {'type': 'loss', 'content': 0.14733731746673584, 'timestamp': '2025-09-10 02:37:59.959361', 'step': 3949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:00.017258', 'step': 3949, 'epoch': 1} {'type': 'loss', 'content': 0.15987572073936462, 'timestamp': '2025-09-10 02:38:00.019923', 'step': 3950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:00.074342', 'step': 3950, 'epoch': 1} {'type': 'loss', 'content': 0.302542507648468, 'timestamp': '2025-09-10 02:38:00.078122', 'step': 3951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:00.133495', 'step': 3951, 'epoch': 1} {'type': 'loss', 'content': 0.16126084327697754, 'timestamp': '2025-09-10 02:38:00.143206', 'step': 3952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:00.201877', 'step': 3952, 'epoch': 1} {'type': 'loss', 'content': 0.21586726605892181, 'timestamp': '2025-09-10 02:38:00.204111', 'step': 3953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:00.258390', 'step': 3953, 'epoch': 1} {'type': 'loss', 'content': 0.1545020341873169, 'timestamp': '2025-09-10 02:38:00.261935', 'step': 3954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:00.318121', 'step': 3954, 'epoch': 1} {'type': 'loss', 'content': 0.12946981191635132, 'timestamp': '2025-09-10 02:38:00.320362', 'step': 3955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:00.380228', 'step': 3955, 'epoch': 1} {'type': 'loss', 'content': 0.09739026427268982, 'timestamp': '2025-09-10 02:38:00.386486', 'step': 3956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:00.440917', 'step': 3956, 'epoch': 1} {'type': 'loss', 'content': 0.14966976642608643, 'timestamp': '2025-09-10 02:38:00.442882', 'step': 3957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:00.498600', 'step': 3957, 'epoch': 1} {'type': 'loss', 'content': 0.19866327941417694, 'timestamp': '2025-09-10 02:38:00.500873', 'step': 3958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:00.556536', 'step': 3958, 'epoch': 1} {'type': 'loss', 'content': 0.20796743035316467, 'timestamp': '2025-09-10 02:38:00.558882', 'step': 3959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:00.614707', 'step': 3959, 'epoch': 1} {'type': 'loss', 'content': 0.1163693517446518, 'timestamp': '2025-09-10 02:38:00.620647', 'step': 3960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:00.674297', 'step': 3960, 'epoch': 1} {'type': 'loss', 'content': 0.23134025931358337, 'timestamp': '2025-09-10 02:38:00.676478', 'step': 3961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:00.730403', 'step': 3961, 'epoch': 1} {'type': 'loss', 'content': 0.1644483357667923, 'timestamp': '2025-09-10 02:38:00.732643', 'step': 3962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:00.786211', 'step': 3962, 'epoch': 1} {'type': 'loss', 'content': 0.17614372074604034, 'timestamp': '2025-09-10 02:38:00.788215', 'step': 3963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:00.840937', 'step': 3963, 'epoch': 1} {'type': 'loss', 'content': 0.148691326379776, 'timestamp': '2025-09-10 02:38:00.846991', 'step': 3964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:00.900910', 'step': 3964, 'epoch': 1} {'type': 'loss', 'content': 0.1777971088886261, 'timestamp': '2025-09-10 02:38:00.902900', 'step': 3965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:00.956961', 'step': 3965, 'epoch': 1} {'type': 'loss', 'content': 0.22248521447181702, 'timestamp': '2025-09-10 02:38:00.959277', 'step': 3966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:01.012346', 'step': 3966, 'epoch': 1} {'type': 'loss', 'content': 0.09949615597724915, 'timestamp': '2025-09-10 02:38:01.014608', 'step': 3967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:01.067682', 'step': 3967, 'epoch': 1} {'type': 'loss', 'content': 0.1719718724489212, 'timestamp': '2025-09-10 02:38:01.073651', 'step': 3968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:01.126880', 'step': 3968, 'epoch': 1} {'type': 'loss', 'content': 0.27670618891716003, 'timestamp': '2025-09-10 02:38:01.128962', 'step': 3969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:01.191308', 'step': 3969, 'epoch': 1} {'type': 'loss', 'content': 0.1899564415216446, 'timestamp': '2025-09-10 02:38:01.193623', 'step': 3970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:01.248646', 'step': 3970, 'epoch': 1} {'type': 'loss', 'content': 0.1231815367937088, 'timestamp': '2025-09-10 02:38:01.250762', 'step': 3971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:01.304439', 'step': 3971, 'epoch': 1} {'type': 'loss', 'content': 0.1828995645046234, 'timestamp': '2025-09-10 02:38:01.310497', 'step': 3972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:01.367914', 'step': 3972, 'epoch': 1} {'type': 'loss', 'content': 0.23284423351287842, 'timestamp': '2025-09-10 02:38:01.370262', 'step': 3973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:01.443066', 'step': 3973, 'epoch': 1} {'type': 'loss', 'content': 0.17737717926502228, 'timestamp': '2025-09-10 02:38:01.446134', 'step': 3974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:01.503061', 'step': 3974, 'epoch': 1} {'type': 'loss', 'content': 0.22134606540203094, 'timestamp': '2025-09-10 02:38:01.505950', 'step': 3975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:01.559406', 'step': 3975, 'epoch': 1} {'type': 'loss', 'content': 0.1593785136938095, 'timestamp': '2025-09-10 02:38:01.566010', 'step': 3976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:01.620032', 'step': 3976, 'epoch': 1} {'type': 'loss', 'content': 0.16169799864292145, 'timestamp': '2025-09-10 02:38:01.622232', 'step': 3977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:01.675785', 'step': 3977, 'epoch': 1} {'type': 'loss', 'content': 0.15952523052692413, 'timestamp': '2025-09-10 02:38:01.678139', 'step': 3978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:01.733465', 'step': 3978, 'epoch': 1} {'type': 'loss', 'content': 0.16546934843063354, 'timestamp': '2025-09-10 02:38:01.735443', 'step': 3979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:01.788836', 'step': 3979, 'epoch': 1} {'type': 'loss', 'content': 0.2046358734369278, 'timestamp': '2025-09-10 02:38:01.795441', 'step': 3980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:01.849013', 'step': 3980, 'epoch': 1} {'type': 'loss', 'content': 0.10228867828845978, 'timestamp': '2025-09-10 02:38:01.854555', 'step': 3981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:01.908115', 'step': 3981, 'epoch': 1} {'type': 'loss', 'content': 0.15148422122001648, 'timestamp': '2025-09-10 02:38:01.910780', 'step': 3982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:01.966908', 'step': 3982, 'epoch': 1} {'type': 'loss', 'content': 0.15943288803100586, 'timestamp': '2025-09-10 02:38:01.973960', 'step': 3983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:02.029774', 'step': 3983, 'epoch': 1} {'type': 'loss', 'content': 0.09643027931451797, 'timestamp': '2025-09-10 02:38:02.035794', 'step': 3984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:02.089511', 'step': 3984, 'epoch': 1} {'type': 'loss', 'content': 0.14526620507240295, 'timestamp': '2025-09-10 02:38:02.091970', 'step': 3985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:02.146346', 'step': 3985, 'epoch': 1} {'type': 'loss', 'content': 0.1533309370279312, 'timestamp': '2025-09-10 02:38:02.150535', 'step': 3986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:02.204324', 'step': 3986, 'epoch': 1} {'type': 'loss', 'content': 0.13036338984966278, 'timestamp': '2025-09-10 02:38:02.211696', 'step': 3987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:02.272362', 'step': 3987, 'epoch': 1} {'type': 'loss', 'content': 0.09208916127681732, 'timestamp': '2025-09-10 02:38:02.278908', 'step': 3988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:02.335516', 'step': 3988, 'epoch': 1} {'type': 'loss', 'content': 0.16332386434078217, 'timestamp': '2025-09-10 02:38:02.337702', 'step': 3989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:02.395975', 'step': 3989, 'epoch': 1} {'type': 'loss', 'content': 0.13822120428085327, 'timestamp': '2025-09-10 02:38:02.397969', 'step': 3990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:02.452259', 'step': 3990, 'epoch': 1} {'type': 'loss', 'content': 0.19425716996192932, 'timestamp': '2025-09-10 02:38:02.454315', 'step': 3991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:02.507697', 'step': 3991, 'epoch': 1} {'type': 'loss', 'content': 0.15940405428409576, 'timestamp': '2025-09-10 02:38:02.513738', 'step': 3992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:02.567435', 'step': 3992, 'epoch': 1} {'type': 'loss', 'content': 0.12188518047332764, 'timestamp': '2025-09-10 02:38:02.569576', 'step': 3993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:02.623869', 'step': 3993, 'epoch': 1} {'type': 'loss', 'content': 0.1546451300382614, 'timestamp': '2025-09-10 02:38:02.626174', 'step': 3994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:02.683467', 'step': 3994, 'epoch': 1} {'type': 'loss', 'content': 0.1516350358724594, 'timestamp': '2025-09-10 02:38:02.685508', 'step': 3995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:02.739767', 'step': 3995, 'epoch': 1} {'type': 'loss', 'content': 0.1981443613767624, 'timestamp': '2025-09-10 02:38:02.746347', 'step': 3996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:02.800099', 'step': 3996, 'epoch': 1} {'type': 'loss', 'content': 0.16340914368629456, 'timestamp': '2025-09-10 02:38:02.803032', 'step': 3997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:02.857227', 'step': 3997, 'epoch': 1} {'type': 'loss', 'content': 0.2466089278459549, 'timestamp': '2025-09-10 02:38:02.862643', 'step': 3998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:02.918112', 'step': 3998, 'epoch': 1} {'type': 'loss', 'content': 0.14000557363033295, 'timestamp': '2025-09-10 02:38:02.920122', 'step': 3999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:02.973590', 'step': 3999, 'epoch': 1} {'type': 'loss', 'content': 0.22193729877471924, 'timestamp': '2025-09-10 02:38:02.979711', 'step': 4000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-09-10 02:38:03.422780', 'step': 4000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:03.480551', 'step': 4000, 'epoch': 1} {'type': 'loss', 'content': 0.16470648348331451, 'timestamp': '2025-09-10 02:38:03.482895', 'step': 4001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:03.541447', 'step': 4001, 'epoch': 1} {'type': 'loss', 'content': 0.13950064778327942, 'timestamp': '2025-09-10 02:38:03.546002', 'step': 4002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:03.608186', 'step': 4002, 'epoch': 1} {'type': 'loss', 'content': 0.15473410487174988, 'timestamp': '2025-09-10 02:38:03.612218', 'step': 4003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:03.666686', 'step': 4003, 'epoch': 1} {'type': 'loss', 'content': 0.2067851573228836, 'timestamp': '2025-09-10 02:38:03.672760', 'step': 4004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:03.728752', 'step': 4004, 'epoch': 1} {'type': 'loss', 'content': 0.20573747158050537, 'timestamp': '2025-09-10 02:38:03.730735', 'step': 4005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:03.794355', 'step': 4005, 'epoch': 1} {'type': 'loss', 'content': 0.21183842420578003, 'timestamp': '2025-09-10 02:38:03.796617', 'step': 4006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:03.852170', 'step': 4006, 'epoch': 1} {'type': 'loss', 'content': 0.16414199769496918, 'timestamp': '2025-09-10 02:38:03.854303', 'step': 4007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:03.909952', 'step': 4007, 'epoch': 1} {'type': 'loss', 'content': 0.14623278379440308, 'timestamp': '2025-09-10 02:38:03.916417', 'step': 4008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:03.970850', 'step': 4008, 'epoch': 1} {'type': 'loss', 'content': 0.23020806908607483, 'timestamp': '2025-09-10 02:38:03.972972', 'step': 4009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:04.033195', 'step': 4009, 'epoch': 1} {'type': 'loss', 'content': 0.12381339073181152, 'timestamp': '2025-09-10 02:38:04.035240', 'step': 4010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:04.094353', 'step': 4010, 'epoch': 1} {'type': 'loss', 'content': 0.2209814190864563, 'timestamp': '2025-09-10 02:38:04.096380', 'step': 4011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:04.165449', 'step': 4011, 'epoch': 1} {'type': 'loss', 'content': 0.13768650591373444, 'timestamp': '2025-09-10 02:38:04.172122', 'step': 4012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:04.228986', 'step': 4012, 'epoch': 1} {'type': 'loss', 'content': 0.2709349989891052, 'timestamp': '2025-09-10 02:38:04.231045', 'step': 4013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:04.284242', 'step': 4013, 'epoch': 1} {'type': 'loss', 'content': 0.15364331007003784, 'timestamp': '2025-09-10 02:38:04.288536', 'step': 4014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:04.342376', 'step': 4014, 'epoch': 1} {'type': 'loss', 'content': 0.16812360286712646, 'timestamp': '2025-09-10 02:38:04.344512', 'step': 4015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:04.401753', 'step': 4015, 'epoch': 1} {'type': 'loss', 'content': 0.2092745006084442, 'timestamp': '2025-09-10 02:38:04.408722', 'step': 4016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:04.463859', 'step': 4016, 'epoch': 1} {'type': 'loss', 'content': 0.12060105800628662, 'timestamp': '2025-09-10 02:38:04.466078', 'step': 4017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:04.522238', 'step': 4017, 'epoch': 1} {'type': 'loss', 'content': 0.15960803627967834, 'timestamp': '2025-09-10 02:38:04.524233', 'step': 4018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:04.579251', 'step': 4018, 'epoch': 1} {'type': 'loss', 'content': 0.17846094071865082, 'timestamp': '2025-09-10 02:38:04.581205', 'step': 4019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:04.635778', 'step': 4019, 'epoch': 1} {'type': 'loss', 'content': 0.10744044929742813, 'timestamp': '2025-09-10 02:38:04.641898', 'step': 4020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:04.695132', 'step': 4020, 'epoch': 1} {'type': 'loss', 'content': 0.15887580811977386, 'timestamp': '2025-09-10 02:38:04.697237', 'step': 4021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:04.750821', 'step': 4021, 'epoch': 1} {'type': 'loss', 'content': 0.31230682134628296, 'timestamp': '2025-09-10 02:38:04.752860', 'step': 4022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:04.808953', 'step': 4022, 'epoch': 1} {'type': 'loss', 'content': 0.12487181276082993, 'timestamp': '2025-09-10 02:38:04.811110', 'step': 4023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:04.865418', 'step': 4023, 'epoch': 1} {'type': 'loss', 'content': 0.19515815377235413, 'timestamp': '2025-09-10 02:38:04.871488', 'step': 4024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:04.927684', 'step': 4024, 'epoch': 1} {'type': 'loss', 'content': 0.13032716512680054, 'timestamp': '2025-09-10 02:38:04.929850', 'step': 4025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:04.984933', 'step': 4025, 'epoch': 1} {'type': 'loss', 'content': 0.15073111653327942, 'timestamp': '2025-09-10 02:38:04.986976', 'step': 4026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:05.041857', 'step': 4026, 'epoch': 1} {'type': 'loss', 'content': 0.16264325380325317, 'timestamp': '2025-09-10 02:38:05.044261', 'step': 4027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:05.099412', 'step': 4027, 'epoch': 1} {'type': 'loss', 'content': 0.17873376607894897, 'timestamp': '2025-09-10 02:38:05.105623', 'step': 4028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:05.159691', 'step': 4028, 'epoch': 1} {'type': 'loss', 'content': 0.1961842179298401, 'timestamp': '2025-09-10 02:38:05.161740', 'step': 4029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:05.220639', 'step': 4029, 'epoch': 1} {'type': 'loss', 'content': 0.16849680244922638, 'timestamp': '2025-09-10 02:38:05.222901', 'step': 4030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:05.278202', 'step': 4030, 'epoch': 1} {'type': 'loss', 'content': 0.17025697231292725, 'timestamp': '2025-09-10 02:38:05.280370', 'step': 4031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:05.334030', 'step': 4031, 'epoch': 1} {'type': 'loss', 'content': 0.1806362420320511, 'timestamp': '2025-09-10 02:38:05.340174', 'step': 4032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:05.396342', 'step': 4032, 'epoch': 1} {'type': 'loss', 'content': 0.17468935251235962, 'timestamp': '2025-09-10 02:38:05.400514', 'step': 4033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:05.456675', 'step': 4033, 'epoch': 1} {'type': 'loss', 'content': 0.18553264439105988, 'timestamp': '2025-09-10 02:38:05.458832', 'step': 4034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:05.515019', 'step': 4034, 'epoch': 1} {'type': 'loss', 'content': 0.1837720423936844, 'timestamp': '2025-09-10 02:38:05.517396', 'step': 4035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:05.573330', 'step': 4035, 'epoch': 1} {'type': 'loss', 'content': 0.23279191553592682, 'timestamp': '2025-09-10 02:38:05.579857', 'step': 4036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:05.636076', 'step': 4036, 'epoch': 1} {'type': 'loss', 'content': 0.18578468263149261, 'timestamp': '2025-09-10 02:38:05.638388', 'step': 4037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:05.692790', 'step': 4037, 'epoch': 1} {'type': 'loss', 'content': 0.19026100635528564, 'timestamp': '2025-09-10 02:38:05.694848', 'step': 4038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:05.750241', 'step': 4038, 'epoch': 1} {'type': 'loss', 'content': 0.13803774118423462, 'timestamp': '2025-09-10 02:38:05.752592', 'step': 4039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:05.807386', 'step': 4039, 'epoch': 1} {'type': 'loss', 'content': 0.08292623609304428, 'timestamp': '2025-09-10 02:38:05.814032', 'step': 4040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:05.868623', 'step': 4040, 'epoch': 1} {'type': 'loss', 'content': 0.1577378511428833, 'timestamp': '2025-09-10 02:38:05.870946', 'step': 4041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:05.925615', 'step': 4041, 'epoch': 1} {'type': 'loss', 'content': 0.14034347236156464, 'timestamp': '2025-09-10 02:38:05.927881', 'step': 4042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:05.983020', 'step': 4042, 'epoch': 1} {'type': 'loss', 'content': 0.20578593015670776, 'timestamp': '2025-09-10 02:38:05.985138', 'step': 4043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:06.041617', 'step': 4043, 'epoch': 1} {'type': 'loss', 'content': 0.1534425914287567, 'timestamp': '2025-09-10 02:38:06.048234', 'step': 4044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:06.103915', 'step': 4044, 'epoch': 1} {'type': 'loss', 'content': 0.15978704392910004, 'timestamp': '2025-09-10 02:38:06.106339', 'step': 4045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:06.161407', 'step': 4045, 'epoch': 1} {'type': 'loss', 'content': 0.1262340545654297, 'timestamp': '2025-09-10 02:38:06.163310', 'step': 4046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:06.217542', 'step': 4046, 'epoch': 1} {'type': 'loss', 'content': 0.15647487342357635, 'timestamp': '2025-09-10 02:38:06.219328', 'step': 4047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:06.273122', 'step': 4047, 'epoch': 1} {'type': 'loss', 'content': 0.22591137886047363, 'timestamp': '2025-09-10 02:38:06.279427', 'step': 4048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:06.334300', 'step': 4048, 'epoch': 1} {'type': 'loss', 'content': 0.13921824097633362, 'timestamp': '2025-09-10 02:38:06.336553', 'step': 4049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:06.393409', 'step': 4049, 'epoch': 1} {'type': 'loss', 'content': 0.21916569769382477, 'timestamp': '2025-09-10 02:38:06.396088', 'step': 4050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:06.453057', 'step': 4050, 'epoch': 1} {'type': 'loss', 'content': 0.17023997008800507, 'timestamp': '2025-09-10 02:38:06.455707', 'step': 4051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:06.511244', 'step': 4051, 'epoch': 1} {'type': 'loss', 'content': 0.18615348637104034, 'timestamp': '2025-09-10 02:38:06.517904', 'step': 4052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:06.575424', 'step': 4052, 'epoch': 1} {'type': 'loss', 'content': 0.08376047760248184, 'timestamp': '2025-09-10 02:38:06.577679', 'step': 4053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:06.633934', 'step': 4053, 'epoch': 1} {'type': 'loss', 'content': 0.2352760136127472, 'timestamp': '2025-09-10 02:38:06.636156', 'step': 4054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:06.691919', 'step': 4054, 'epoch': 1} {'type': 'loss', 'content': 0.16099195182323456, 'timestamp': '2025-09-10 02:38:06.694104', 'step': 4055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:06.749417', 'step': 4055, 'epoch': 1} {'type': 'loss', 'content': 0.1567154973745346, 'timestamp': '2025-09-10 02:38:06.755746', 'step': 4056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:06.809959', 'step': 4056, 'epoch': 1} {'type': 'loss', 'content': 0.21814556419849396, 'timestamp': '2025-09-10 02:38:06.812297', 'step': 4057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:06.866667', 'step': 4057, 'epoch': 1} {'type': 'loss', 'content': 0.21776701509952545, 'timestamp': '2025-09-10 02:38:06.868723', 'step': 4058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:06.923699', 'step': 4058, 'epoch': 1} {'type': 'loss', 'content': 0.18574641644954681, 'timestamp': '2025-09-10 02:38:06.926170', 'step': 4059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:06.981361', 'step': 4059, 'epoch': 1} {'type': 'loss', 'content': 0.20945163071155548, 'timestamp': '2025-09-10 02:38:06.987707', 'step': 4060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:07.042942', 'step': 4060, 'epoch': 1} {'type': 'loss', 'content': 0.2632828950881958, 'timestamp': '2025-09-10 02:38:07.045161', 'step': 4061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:07.102182', 'step': 4061, 'epoch': 1} {'type': 'loss', 'content': 0.1744033247232437, 'timestamp': '2025-09-10 02:38:07.104562', 'step': 4062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:07.161249', 'step': 4062, 'epoch': 1} {'type': 'loss', 'content': 0.20962001383304596, 'timestamp': '2025-09-10 02:38:07.163349', 'step': 4063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:07.218852', 'step': 4063, 'epoch': 1} {'type': 'loss', 'content': 0.24757592380046844, 'timestamp': '2025-09-10 02:38:07.225333', 'step': 4064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:07.285974', 'step': 4064, 'epoch': 1} {'type': 'loss', 'content': 0.1593959927558899, 'timestamp': '2025-09-10 02:38:07.288945', 'step': 4065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:07.344401', 'step': 4065, 'epoch': 1} {'type': 'loss', 'content': 0.15069769322872162, 'timestamp': '2025-09-10 02:38:07.346722', 'step': 4066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:07.406229', 'step': 4066, 'epoch': 1} {'type': 'loss', 'content': 0.22253809869289398, 'timestamp': '2025-09-10 02:38:07.409035', 'step': 4067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:07.464085', 'step': 4067, 'epoch': 1} {'type': 'loss', 'content': 0.12059096246957779, 'timestamp': '2025-09-10 02:38:07.470536', 'step': 4068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:07.525338', 'step': 4068, 'epoch': 1} {'type': 'loss', 'content': 0.17688100039958954, 'timestamp': '2025-09-10 02:38:07.527594', 'step': 4069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:07.584189', 'step': 4069, 'epoch': 1} {'type': 'loss', 'content': 0.15537188947200775, 'timestamp': '2025-09-10 02:38:07.586549', 'step': 4070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:07.643429', 'step': 4070, 'epoch': 1} {'type': 'loss', 'content': 0.10192187130451202, 'timestamp': '2025-09-10 02:38:07.645664', 'step': 4071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:07.701453', 'step': 4071, 'epoch': 1} {'type': 'loss', 'content': 0.16381794214248657, 'timestamp': '2025-09-10 02:38:07.708045', 'step': 4072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:07.762749', 'step': 4072, 'epoch': 1} {'type': 'loss', 'content': 0.256020188331604, 'timestamp': '2025-09-10 02:38:07.764988', 'step': 4073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:07.819957', 'step': 4073, 'epoch': 1} {'type': 'loss', 'content': 0.08571162819862366, 'timestamp': '2025-09-10 02:38:07.822359', 'step': 4074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:07.877887', 'step': 4074, 'epoch': 1} {'type': 'loss', 'content': 0.16605576872825623, 'timestamp': '2025-09-10 02:38:07.880256', 'step': 4075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:07.944279', 'step': 4075, 'epoch': 1} {'type': 'loss', 'content': 0.17889079451560974, 'timestamp': '2025-09-10 02:38:07.950832', 'step': 4076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:08.005419', 'step': 4076, 'epoch': 1} {'type': 'loss', 'content': 0.18038813769817352, 'timestamp': '2025-09-10 02:38:08.007652', 'step': 4077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:08.062749', 'step': 4077, 'epoch': 1} {'type': 'loss', 'content': 0.14647163450717926, 'timestamp': '2025-09-10 02:38:08.064772', 'step': 4078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:08.121412', 'step': 4078, 'epoch': 1} {'type': 'loss', 'content': 0.11727321892976761, 'timestamp': '2025-09-10 02:38:08.125601', 'step': 4079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:08.182417', 'step': 4079, 'epoch': 1} {'type': 'loss', 'content': 0.260631263256073, 'timestamp': '2025-09-10 02:38:08.189055', 'step': 4080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:08.252143', 'step': 4080, 'epoch': 1} {'type': 'loss', 'content': 0.16203060746192932, 'timestamp': '2025-09-10 02:38:08.254362', 'step': 4081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:08.311591', 'step': 4081, 'epoch': 1} {'type': 'loss', 'content': 0.202874094247818, 'timestamp': '2025-09-10 02:38:08.313901', 'step': 4082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:08.369558', 'step': 4082, 'epoch': 1} {'type': 'loss', 'content': 0.12460014224052429, 'timestamp': '2025-09-10 02:38:08.371619', 'step': 4083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:08.428867', 'step': 4083, 'epoch': 1} {'type': 'loss', 'content': 0.18379169702529907, 'timestamp': '2025-09-10 02:38:08.435131', 'step': 4084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:08.489634', 'step': 4084, 'epoch': 1} {'type': 'loss', 'content': 0.2063678503036499, 'timestamp': '2025-09-10 02:38:08.493147', 'step': 4085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:08.548657', 'step': 4085, 'epoch': 1} {'type': 'loss', 'content': 0.13680949807167053, 'timestamp': '2025-09-10 02:38:08.550701', 'step': 4086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:08.608115', 'step': 4086, 'epoch': 1} {'type': 'loss', 'content': 0.16673825681209564, 'timestamp': '2025-09-10 02:38:08.610854', 'step': 4087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:08.668549', 'step': 4087, 'epoch': 1} {'type': 'loss', 'content': 0.1901138871908188, 'timestamp': '2025-09-10 02:38:08.675123', 'step': 4088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:08.730745', 'step': 4088, 'epoch': 1} {'type': 'loss', 'content': 0.10097214579582214, 'timestamp': '2025-09-10 02:38:08.733152', 'step': 4089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:08.793276', 'step': 4089, 'epoch': 1} {'type': 'loss', 'content': 0.055307306349277496, 'timestamp': '2025-09-10 02:38:08.795571', 'step': 4090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:08.861922', 'step': 4090, 'epoch': 1} {'type': 'loss', 'content': 0.12026933580636978, 'timestamp': '2025-09-10 02:38:08.864198', 'step': 4091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:08.928129', 'step': 4091, 'epoch': 1} {'type': 'loss', 'content': 0.12250806391239166, 'timestamp': '2025-09-10 02:38:08.934519', 'step': 4092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:08.989114', 'step': 4092, 'epoch': 1} {'type': 'loss', 'content': 0.16470491886138916, 'timestamp': '2025-09-10 02:38:08.991210', 'step': 4093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:09.046461', 'step': 4093, 'epoch': 1} {'type': 'loss', 'content': 0.12788070738315582, 'timestamp': '2025-09-10 02:38:09.048497', 'step': 4094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:09.112783', 'step': 4094, 'epoch': 1} {'type': 'loss', 'content': 0.17544330656528473, 'timestamp': '2025-09-10 02:38:09.116704', 'step': 4095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:09.173957', 'step': 4095, 'epoch': 1} {'type': 'loss', 'content': 0.21212460100650787, 'timestamp': '2025-09-10 02:38:09.180518', 'step': 4096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:09.241100', 'step': 4096, 'epoch': 1} {'type': 'loss', 'content': 0.13371428847312927, 'timestamp': '2025-09-10 02:38:09.243574', 'step': 4097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:09.299873', 'step': 4097, 'epoch': 1} {'type': 'loss', 'content': 0.1485687494277954, 'timestamp': '2025-09-10 02:38:09.302256', 'step': 4098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:09.363706', 'step': 4098, 'epoch': 1} {'type': 'loss', 'content': 0.23905859887599945, 'timestamp': '2025-09-10 02:38:09.369426', 'step': 4099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:09.425307', 'step': 4099, 'epoch': 1} {'type': 'loss', 'content': 0.16073523461818695, 'timestamp': '2025-09-10 02:38:09.431705', 'step': 4100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:09.486449', 'step': 4100, 'epoch': 1} {'type': 'loss', 'content': 0.18588584661483765, 'timestamp': '2025-09-10 02:38:09.488647', 'step': 4101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:09.544060', 'step': 4101, 'epoch': 1} {'type': 'loss', 'content': 0.1503216028213501, 'timestamp': '2025-09-10 02:38:09.546283', 'step': 4102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:09.603744', 'step': 4102, 'epoch': 1} {'type': 'loss', 'content': 0.2553652226924896, 'timestamp': '2025-09-10 02:38:09.606346', 'step': 4103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:09.663507', 'step': 4103, 'epoch': 1} {'type': 'loss', 'content': 0.13091441988945007, 'timestamp': '2025-09-10 02:38:09.669937', 'step': 4104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:09.726739', 'step': 4104, 'epoch': 1} {'type': 'loss', 'content': 0.08805477619171143, 'timestamp': '2025-09-10 02:38:09.729118', 'step': 4105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:09.785025', 'step': 4105, 'epoch': 1} {'type': 'loss', 'content': 0.1705373227596283, 'timestamp': '2025-09-10 02:38:09.787458', 'step': 4106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:09.844033', 'step': 4106, 'epoch': 1} {'type': 'loss', 'content': 0.2105759084224701, 'timestamp': '2025-09-10 02:38:09.846323', 'step': 4107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:09.902019', 'step': 4107, 'epoch': 1} {'type': 'loss', 'content': 0.1895967423915863, 'timestamp': '2025-09-10 02:38:09.908856', 'step': 4108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:09.972177', 'step': 4108, 'epoch': 1} {'type': 'loss', 'content': 0.17657746374607086, 'timestamp': '2025-09-10 02:38:09.974197', 'step': 4109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:10.029921', 'step': 4109, 'epoch': 1} {'type': 'loss', 'content': 0.13020923733711243, 'timestamp': '2025-09-10 02:38:10.032285', 'step': 4110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:10.087906', 'step': 4110, 'epoch': 1} {'type': 'loss', 'content': 0.2960221767425537, 'timestamp': '2025-09-10 02:38:10.090300', 'step': 4111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:10.145220', 'step': 4111, 'epoch': 1} {'type': 'loss', 'content': 0.12198405712842941, 'timestamp': '2025-09-10 02:38:10.151601', 'step': 4112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:10.206374', 'step': 4112, 'epoch': 1} {'type': 'loss', 'content': 0.18916566669940948, 'timestamp': '2025-09-10 02:38:10.208674', 'step': 4113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:10.264344', 'step': 4113, 'epoch': 1} {'type': 'loss', 'content': 0.1477688103914261, 'timestamp': '2025-09-10 02:38:10.268117', 'step': 4114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:10.325807', 'step': 4114, 'epoch': 1} {'type': 'loss', 'content': 0.1260242611169815, 'timestamp': '2025-09-10 02:38:10.328050', 'step': 4115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:10.383843', 'step': 4115, 'epoch': 1} {'type': 'loss', 'content': 0.08024482429027557, 'timestamp': '2025-09-10 02:38:10.390445', 'step': 4116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:10.445763', 'step': 4116, 'epoch': 1} {'type': 'loss', 'content': 0.15091286599636078, 'timestamp': '2025-09-10 02:38:10.447937', 'step': 4117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:10.503510', 'step': 4117, 'epoch': 1} {'type': 'loss', 'content': 0.13446387648582458, 'timestamp': '2025-09-10 02:38:10.505978', 'step': 4118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:10.561795', 'step': 4118, 'epoch': 1} {'type': 'loss', 'content': 0.19781696796417236, 'timestamp': '2025-09-10 02:38:10.564099', 'step': 4119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:10.618565', 'step': 4119, 'epoch': 1} {'type': 'loss', 'content': 0.1630074828863144, 'timestamp': '2025-09-10 02:38:10.626568', 'step': 4120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:10.681192', 'step': 4120, 'epoch': 1} {'type': 'loss', 'content': 0.21291624009609222, 'timestamp': '2025-09-10 02:38:10.683503', 'step': 4121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:10.738824', 'step': 4121, 'epoch': 1} {'type': 'loss', 'content': 0.14745430648326874, 'timestamp': '2025-09-10 02:38:10.741072', 'step': 4122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:10.797480', 'step': 4122, 'epoch': 1} {'type': 'loss', 'content': 0.23689426481723785, 'timestamp': '2025-09-10 02:38:10.799709', 'step': 4123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:10.855878', 'step': 4123, 'epoch': 1} {'type': 'loss', 'content': 0.16927483677864075, 'timestamp': '2025-09-10 02:38:10.862496', 'step': 4124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:10.917107', 'step': 4124, 'epoch': 1} {'type': 'loss', 'content': 0.19908013939857483, 'timestamp': '2025-09-10 02:38:10.919530', 'step': 4125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:10.974718', 'step': 4125, 'epoch': 1} {'type': 'loss', 'content': 0.23140937089920044, 'timestamp': '2025-09-10 02:38:10.976762', 'step': 4126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:11.032351', 'step': 4126, 'epoch': 1} {'type': 'loss', 'content': 0.1221768707036972, 'timestamp': '2025-09-10 02:38:11.034698', 'step': 4127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:11.089613', 'step': 4127, 'epoch': 1} {'type': 'loss', 'content': 0.13437210023403168, 'timestamp': '2025-09-10 02:38:11.096189', 'step': 4128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:11.153461', 'step': 4128, 'epoch': 1} {'type': 'loss', 'content': 0.09129895269870758, 'timestamp': '2025-09-10 02:38:11.155647', 'step': 4129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:11.211189', 'step': 4129, 'epoch': 1} {'type': 'loss', 'content': 0.17783129215240479, 'timestamp': '2025-09-10 02:38:11.213533', 'step': 4130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:11.269920', 'step': 4130, 'epoch': 1} {'type': 'loss', 'content': 0.22279003262519836, 'timestamp': '2025-09-10 02:38:11.275916', 'step': 4131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:11.332608', 'step': 4131, 'epoch': 1} {'type': 'loss', 'content': 0.13232795894145966, 'timestamp': '2025-09-10 02:38:11.339357', 'step': 4132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:11.395113', 'step': 4132, 'epoch': 1} {'type': 'loss', 'content': 0.2258724421262741, 'timestamp': '2025-09-10 02:38:11.398684', 'step': 4133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:11.454175', 'step': 4133, 'epoch': 1} {'type': 'loss', 'content': 0.2047417163848877, 'timestamp': '2025-09-10 02:38:11.456193', 'step': 4134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:11.513993', 'step': 4134, 'epoch': 1} {'type': 'loss', 'content': 0.17325995862483978, 'timestamp': '2025-09-10 02:38:11.524787', 'step': 4135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:11.582082', 'step': 4135, 'epoch': 1} {'type': 'loss', 'content': 0.17077241837978363, 'timestamp': '2025-09-10 02:38:11.591891', 'step': 4136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:11.649446', 'step': 4136, 'epoch': 1} {'type': 'loss', 'content': 0.1007767766714096, 'timestamp': '2025-09-10 02:38:11.651727', 'step': 4137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:11.707160', 'step': 4137, 'epoch': 1} {'type': 'loss', 'content': 0.20265212655067444, 'timestamp': '2025-09-10 02:38:11.717011', 'step': 4138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:11.776260', 'step': 4138, 'epoch': 1} {'type': 'loss', 'content': 0.18307025730609894, 'timestamp': '2025-09-10 02:38:11.778509', 'step': 4139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:11.833809', 'step': 4139, 'epoch': 1} {'type': 'loss', 'content': 0.12971143424510956, 'timestamp': '2025-09-10 02:38:11.841020', 'step': 4140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:11.903416', 'step': 4140, 'epoch': 1} {'type': 'loss', 'content': 0.1349358707666397, 'timestamp': '2025-09-10 02:38:11.906852', 'step': 4141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:11.966737', 'step': 4141, 'epoch': 1} {'type': 'loss', 'content': 0.09535226970911026, 'timestamp': '2025-09-10 02:38:11.969109', 'step': 4142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:12.031164', 'step': 4142, 'epoch': 1} {'type': 'loss', 'content': 0.1462002396583557, 'timestamp': '2025-09-10 02:38:12.033707', 'step': 4143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:12.089390', 'step': 4143, 'epoch': 1} {'type': 'loss', 'content': 0.1845846176147461, 'timestamp': '2025-09-10 02:38:12.096405', 'step': 4144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:12.157934', 'step': 4144, 'epoch': 1} {'type': 'loss', 'content': 0.1756369024515152, 'timestamp': '2025-09-10 02:38:12.160163', 'step': 4145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:12.216552', 'step': 4145, 'epoch': 1} {'type': 'loss', 'content': 0.2071438729763031, 'timestamp': '2025-09-10 02:38:12.219093', 'step': 4146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:12.279103', 'step': 4146, 'epoch': 1} {'type': 'loss', 'content': 0.20630227029323578, 'timestamp': '2025-09-10 02:38:12.282994', 'step': 4147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:12.340079', 'step': 4147, 'epoch': 1} {'type': 'loss', 'content': 0.11253169178962708, 'timestamp': '2025-09-10 02:38:12.346249', 'step': 4148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:12.401859', 'step': 4148, 'epoch': 1} {'type': 'loss', 'content': 0.08320990204811096, 'timestamp': '2025-09-10 02:38:12.403886', 'step': 4149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:12.459919', 'step': 4149, 'epoch': 1} {'type': 'loss', 'content': 0.21152138710021973, 'timestamp': '2025-09-10 02:38:12.462196', 'step': 4150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:12.518965', 'step': 4150, 'epoch': 1} {'type': 'loss', 'content': 0.16645672917366028, 'timestamp': '2025-09-10 02:38:12.520951', 'step': 4151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:12.580515', 'step': 4151, 'epoch': 1} {'type': 'loss', 'content': 0.09840870648622513, 'timestamp': '2025-09-10 02:38:12.586825', 'step': 4152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:12.642019', 'step': 4152, 'epoch': 1} {'type': 'loss', 'content': 0.18147902190685272, 'timestamp': '2025-09-10 02:38:12.644143', 'step': 4153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:12.698970', 'step': 4153, 'epoch': 1} {'type': 'loss', 'content': 0.2041713148355484, 'timestamp': '2025-09-10 02:38:12.701044', 'step': 4154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:12.756302', 'step': 4154, 'epoch': 1} {'type': 'loss', 'content': 0.0795927420258522, 'timestamp': '2025-09-10 02:38:12.758335', 'step': 4155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:12.813076', 'step': 4155, 'epoch': 1} {'type': 'loss', 'content': 0.17000631988048553, 'timestamp': '2025-09-10 02:38:12.819298', 'step': 4156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:12.873134', 'step': 4156, 'epoch': 1} {'type': 'loss', 'content': 0.2118203192949295, 'timestamp': '2025-09-10 02:38:12.875164', 'step': 4157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:12.930459', 'step': 4157, 'epoch': 1} {'type': 'loss', 'content': 0.15943065285682678, 'timestamp': '2025-09-10 02:38:12.932717', 'step': 4158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:12.989321', 'step': 4158, 'epoch': 1} {'type': 'loss', 'content': 0.19885997474193573, 'timestamp': '2025-09-10 02:38:12.991781', 'step': 4159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:13.048292', 'step': 4159, 'epoch': 1} {'type': 'loss', 'content': 0.22353346645832062, 'timestamp': '2025-09-10 02:38:13.054990', 'step': 4160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:13.109365', 'step': 4160, 'epoch': 1} {'type': 'loss', 'content': 0.13890154659748077, 'timestamp': '2025-09-10 02:38:13.111730', 'step': 4161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:13.168957', 'step': 4161, 'epoch': 1} {'type': 'loss', 'content': 0.19449760019779205, 'timestamp': '2025-09-10 02:38:13.171136', 'step': 4162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:13.226170', 'step': 4162, 'epoch': 1} {'type': 'loss', 'content': 0.13489189743995667, 'timestamp': '2025-09-10 02:38:13.228381', 'step': 4163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:13.283352', 'step': 4163, 'epoch': 1} {'type': 'loss', 'content': 0.09814481437206268, 'timestamp': '2025-09-10 02:38:13.289611', 'step': 4164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:13.343784', 'step': 4164, 'epoch': 1} {'type': 'loss', 'content': 0.1465628296136856, 'timestamp': '2025-09-10 02:38:13.345962', 'step': 4165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:13.401394', 'step': 4165, 'epoch': 1} {'type': 'loss', 'content': 0.15442867577075958, 'timestamp': '2025-09-10 02:38:13.403676', 'step': 4166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:13.459696', 'step': 4166, 'epoch': 1} {'type': 'loss', 'content': 0.15616004168987274, 'timestamp': '2025-09-10 02:38:13.462046', 'step': 4167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:13.518168', 'step': 4167, 'epoch': 1} {'type': 'loss', 'content': 0.12038880586624146, 'timestamp': '2025-09-10 02:38:13.524873', 'step': 4168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:13.582806', 'step': 4168, 'epoch': 1} {'type': 'loss', 'content': 0.11570286005735397, 'timestamp': '2025-09-10 02:38:13.584813', 'step': 4169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:13.647749', 'step': 4169, 'epoch': 1} {'type': 'loss', 'content': 0.12332766503095627, 'timestamp': '2025-09-10 02:38:13.649820', 'step': 4170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:13.706839', 'step': 4170, 'epoch': 1} {'type': 'loss', 'content': 0.13043060898780823, 'timestamp': '2025-09-10 02:38:13.709208', 'step': 4171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:13.766013', 'step': 4171, 'epoch': 1} {'type': 'loss', 'content': 0.1361061930656433, 'timestamp': '2025-09-10 02:38:13.772846', 'step': 4172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:13.829365', 'step': 4172, 'epoch': 1} {'type': 'loss', 'content': 0.1696624457836151, 'timestamp': '2025-09-10 02:38:13.831360', 'step': 4173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:13.886710', 'step': 4173, 'epoch': 1} {'type': 'loss', 'content': 0.1132735162973404, 'timestamp': '2025-09-10 02:38:13.889071', 'step': 4174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:13.944666', 'step': 4174, 'epoch': 1} {'type': 'loss', 'content': 0.09219954907894135, 'timestamp': '2025-09-10 02:38:13.947009', 'step': 4175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:14.001528', 'step': 4175, 'epoch': 1} {'type': 'loss', 'content': 0.09615007042884827, 'timestamp': '2025-09-10 02:38:14.007760', 'step': 4176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:14.061163', 'step': 4176, 'epoch': 1} {'type': 'loss', 'content': 0.1725587248802185, 'timestamp': '2025-09-10 02:38:14.063214', 'step': 4177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:14.116631', 'step': 4177, 'epoch': 1} {'type': 'loss', 'content': 0.15226374566555023, 'timestamp': '2025-09-10 02:38:14.118885', 'step': 4178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:14.173186', 'step': 4178, 'epoch': 1} {'type': 'loss', 'content': 0.21943625807762146, 'timestamp': '2025-09-10 02:38:14.175318', 'step': 4179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:14.229786', 'step': 4179, 'epoch': 1} {'type': 'loss', 'content': 0.14650669693946838, 'timestamp': '2025-09-10 02:38:14.236274', 'step': 4180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:14.292287', 'step': 4180, 'epoch': 1} {'type': 'loss', 'content': 0.05212988331913948, 'timestamp': '2025-09-10 02:38:14.294537', 'step': 4181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:14.349323', 'step': 4181, 'epoch': 1} {'type': 'loss', 'content': 0.1452847272157669, 'timestamp': '2025-09-10 02:38:14.351516', 'step': 4182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:14.406595', 'step': 4182, 'epoch': 1} {'type': 'loss', 'content': 0.23894067108631134, 'timestamp': '2025-09-10 02:38:14.408633', 'step': 4183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:14.463410', 'step': 4183, 'epoch': 1} {'type': 'loss', 'content': 0.13105569779872894, 'timestamp': '2025-09-10 02:38:14.469692', 'step': 4184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:14.525141', 'step': 4184, 'epoch': 1} {'type': 'loss', 'content': 0.1443193256855011, 'timestamp': '2025-09-10 02:38:14.527352', 'step': 4185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:14.587264', 'step': 4185, 'epoch': 1} {'type': 'loss', 'content': 0.10905091464519501, 'timestamp': '2025-09-10 02:38:14.589476', 'step': 4186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:14.645409', 'step': 4186, 'epoch': 1} {'type': 'loss', 'content': 0.22670499980449677, 'timestamp': '2025-09-10 02:38:14.647606', 'step': 4187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:14.702574', 'step': 4187, 'epoch': 1} {'type': 'loss', 'content': 0.11562981456518173, 'timestamp': '2025-09-10 02:38:14.709097', 'step': 4188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:14.763749', 'step': 4188, 'epoch': 1} {'type': 'loss', 'content': 0.1288502812385559, 'timestamp': '2025-09-10 02:38:14.766423', 'step': 4189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:14.822471', 'step': 4189, 'epoch': 1} {'type': 'loss', 'content': 0.24735786020755768, 'timestamp': '2025-09-10 02:38:14.824732', 'step': 4190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:14.881032', 'step': 4190, 'epoch': 1} {'type': 'loss', 'content': 0.11301320791244507, 'timestamp': '2025-09-10 02:38:14.883086', 'step': 4191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:14.938091', 'step': 4191, 'epoch': 1} {'type': 'loss', 'content': 0.15676595270633698, 'timestamp': '2025-09-10 02:38:14.944605', 'step': 4192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:14.998885', 'step': 4192, 'epoch': 1} {'type': 'loss', 'content': 0.1631697118282318, 'timestamp': '2025-09-10 02:38:15.001109', 'step': 4193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:15.056424', 'step': 4193, 'epoch': 1} {'type': 'loss', 'content': 0.22147689759731293, 'timestamp': '2025-09-10 02:38:15.058629', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:38:27.971383', 'step': 4194, 'epoch': 1} {'type': 'pplx', 'content': 14178.318834626629, 'timestamp': '2025-09-10 02:38:27.974401', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:28.028003', 'step': 4194, 'epoch': 1} {'type': 'loss', 'content': 0.18563002347946167, 'timestamp': '2025-09-10 02:38:28.030322', 'step': 4195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:28.087620', 'step': 4195, 'epoch': 1} {'type': 'loss', 'content': 0.12710930407047272, 'timestamp': '2025-09-10 02:38:28.093892', 'step': 4196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:28.148224', 'step': 4196, 'epoch': 1} {'type': 'loss', 'content': 0.13567210733890533, 'timestamp': '2025-09-10 02:38:28.151486', 'step': 4197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:28.208660', 'step': 4197, 'epoch': 1} {'type': 'loss', 'content': 0.1443861722946167, 'timestamp': '2025-09-10 02:38:28.210967', 'step': 4198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:28.267587', 'step': 4198, 'epoch': 1} {'type': 'loss', 'content': 0.13935378193855286, 'timestamp': '2025-09-10 02:38:28.271776', 'step': 4199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:28.327872', 'step': 4199, 'epoch': 1} {'type': 'loss', 'content': 0.17022038996219635, 'timestamp': '2025-09-10 02:38:28.334266', 'step': 4200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:28.386696', 'step': 4200, 'epoch': 1} {'type': 'loss', 'content': 0.11257786303758621, 'timestamp': '2025-09-10 02:38:28.388963', 'step': 4201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:28.443013', 'step': 4201, 'epoch': 1} {'type': 'loss', 'content': 0.22937262058258057, 'timestamp': '2025-09-10 02:38:28.445390', 'step': 4202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:28.499286', 'step': 4202, 'epoch': 1} {'type': 'loss', 'content': 0.213912233710289, 'timestamp': '2025-09-10 02:38:28.501697', 'step': 4203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:28.556523', 'step': 4203, 'epoch': 1} {'type': 'loss', 'content': 0.2680577039718628, 'timestamp': '2025-09-10 02:38:28.563055', 'step': 4204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:28.618915', 'step': 4204, 'epoch': 1} {'type': 'loss', 'content': 0.13397477567195892, 'timestamp': '2025-09-10 02:38:28.621892', 'step': 4205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:28.678986', 'step': 4205, 'epoch': 1} {'type': 'loss', 'content': 0.17607292532920837, 'timestamp': '2025-09-10 02:38:28.681448', 'step': 4206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:28.737698', 'step': 4206, 'epoch': 1} {'type': 'loss', 'content': 0.1489536166191101, 'timestamp': '2025-09-10 02:38:28.740145', 'step': 4207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:28.795031', 'step': 4207, 'epoch': 1} {'type': 'loss', 'content': 0.15413062274456024, 'timestamp': '2025-09-10 02:38:28.801456', 'step': 4208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:28.855149', 'step': 4208, 'epoch': 1} {'type': 'loss', 'content': 0.21099263429641724, 'timestamp': '2025-09-10 02:38:28.857308', 'step': 4209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:28.911279', 'step': 4209, 'epoch': 1} {'type': 'loss', 'content': 0.2000863254070282, 'timestamp': '2025-09-10 02:38:28.913710', 'step': 4210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:28.970919', 'step': 4210, 'epoch': 1} {'type': 'loss', 'content': 0.1293633133172989, 'timestamp': '2025-09-10 02:38:28.973813', 'step': 4211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:29.030883', 'step': 4211, 'epoch': 1} {'type': 'loss', 'content': 0.28614452481269836, 'timestamp': '2025-09-10 02:38:29.038789', 'step': 4212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:29.093904', 'step': 4212, 'epoch': 1} {'type': 'loss', 'content': 0.13933299481868744, 'timestamp': '2025-09-10 02:38:29.096430', 'step': 4213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:29.153299', 'step': 4213, 'epoch': 1} {'type': 'loss', 'content': 0.16827814280986786, 'timestamp': '2025-09-10 02:38:29.155725', 'step': 4214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:29.214095', 'step': 4214, 'epoch': 1} {'type': 'loss', 'content': 0.19106298685073853, 'timestamp': '2025-09-10 02:38:29.217795', 'step': 4215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:29.285083', 'step': 4215, 'epoch': 1} {'type': 'loss', 'content': 0.19412127137184143, 'timestamp': '2025-09-10 02:38:29.292092', 'step': 4216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:29.350215', 'step': 4216, 'epoch': 1} {'type': 'loss', 'content': 0.17685511708259583, 'timestamp': '2025-09-10 02:38:29.352646', 'step': 4217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:29.410564', 'step': 4217, 'epoch': 1} {'type': 'loss', 'content': 0.18958184123039246, 'timestamp': '2025-09-10 02:38:29.413053', 'step': 4218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:29.471661', 'step': 4218, 'epoch': 1} {'type': 'loss', 'content': 0.16505348682403564, 'timestamp': '2025-09-10 02:38:29.474327', 'step': 4219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:29.533154', 'step': 4219, 'epoch': 1} {'type': 'loss', 'content': 0.13061067461967468, 'timestamp': '2025-09-10 02:38:29.540548', 'step': 4220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:29.599154', 'step': 4220, 'epoch': 1} {'type': 'loss', 'content': 0.10334541648626328, 'timestamp': '2025-09-10 02:38:29.602508', 'step': 4221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:29.671120', 'step': 4221, 'epoch': 1} {'type': 'loss', 'content': 0.098843514919281, 'timestamp': '2025-09-10 02:38:29.673823', 'step': 4222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:29.732390', 'step': 4222, 'epoch': 1} {'type': 'loss', 'content': 0.19170665740966797, 'timestamp': '2025-09-10 02:38:29.735722', 'step': 4223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:29.794883', 'step': 4223, 'epoch': 1} {'type': 'loss', 'content': 0.195248082280159, 'timestamp': '2025-09-10 02:38:29.802328', 'step': 4224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:29.861059', 'step': 4224, 'epoch': 1} {'type': 'loss', 'content': 0.13385383784770966, 'timestamp': '2025-09-10 02:38:29.863600', 'step': 4225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:29.925195', 'step': 4225, 'epoch': 1} {'type': 'loss', 'content': 0.22760911285877228, 'timestamp': '2025-09-10 02:38:29.927545', 'step': 4226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:29.988921', 'step': 4226, 'epoch': 1} {'type': 'loss', 'content': 0.2729531228542328, 'timestamp': '2025-09-10 02:38:29.991383', 'step': 4227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:30.051975', 'step': 4227, 'epoch': 1} {'type': 'loss', 'content': 0.17590662837028503, 'timestamp': '2025-09-10 02:38:30.061603', 'step': 4228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:30.120571', 'step': 4228, 'epoch': 1} {'type': 'loss', 'content': 0.08461495488882065, 'timestamp': '2025-09-10 02:38:30.122952', 'step': 4229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:30.182628', 'step': 4229, 'epoch': 1} {'type': 'loss', 'content': 0.1388024389743805, 'timestamp': '2025-09-10 02:38:30.185188', 'step': 4230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:30.244660', 'step': 4230, 'epoch': 1} {'type': 'loss', 'content': 0.10925611108541489, 'timestamp': '2025-09-10 02:38:30.247140', 'step': 4231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:30.309944', 'step': 4231, 'epoch': 1} {'type': 'loss', 'content': 0.15749216079711914, 'timestamp': '2025-09-10 02:38:30.317613', 'step': 4232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:30.379309', 'step': 4232, 'epoch': 1} {'type': 'loss', 'content': 0.22855496406555176, 'timestamp': '2025-09-10 02:38:30.383945', 'step': 4233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:30.444208', 'step': 4233, 'epoch': 1} {'type': 'loss', 'content': 0.24695125222206116, 'timestamp': '2025-09-10 02:38:30.451395', 'step': 4234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:30.511789', 'step': 4234, 'epoch': 1} {'type': 'loss', 'content': 0.29325640201568604, 'timestamp': '2025-09-10 02:38:30.514191', 'step': 4235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:30.573250', 'step': 4235, 'epoch': 1} {'type': 'loss', 'content': 0.20214177668094635, 'timestamp': '2025-09-10 02:38:30.580548', 'step': 4236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:30.640165', 'step': 4236, 'epoch': 1} {'type': 'loss', 'content': 0.19392924010753632, 'timestamp': '2025-09-10 02:38:30.642493', 'step': 4237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:30.702149', 'step': 4237, 'epoch': 1} {'type': 'loss', 'content': 0.16489632427692413, 'timestamp': '2025-09-10 02:38:30.704725', 'step': 4238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:30.763744', 'step': 4238, 'epoch': 1} {'type': 'loss', 'content': 0.14471015334129333, 'timestamp': '2025-09-10 02:38:30.766221', 'step': 4239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:30.827428', 'step': 4239, 'epoch': 1} {'type': 'loss', 'content': 0.06576739996671677, 'timestamp': '2025-09-10 02:38:30.835074', 'step': 4240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:30.894245', 'step': 4240, 'epoch': 1} {'type': 'loss', 'content': 0.11546618491411209, 'timestamp': '2025-09-10 02:38:30.897003', 'step': 4241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:30.956750', 'step': 4241, 'epoch': 1} {'type': 'loss', 'content': 0.11460503935813904, 'timestamp': '2025-09-10 02:38:30.959247', 'step': 4242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:31.018327', 'step': 4242, 'epoch': 1} {'type': 'loss', 'content': 0.23023344576358795, 'timestamp': '2025-09-10 02:38:31.020626', 'step': 4243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:31.079146', 'step': 4243, 'epoch': 1} {'type': 'loss', 'content': 0.20663689076900482, 'timestamp': '2025-09-10 02:38:31.086240', 'step': 4244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:31.144108', 'step': 4244, 'epoch': 1} {'type': 'loss', 'content': 0.11803900450468063, 'timestamp': '2025-09-10 02:38:31.146562', 'step': 4245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:31.206395', 'step': 4245, 'epoch': 1} {'type': 'loss', 'content': 0.17095333337783813, 'timestamp': '2025-09-10 02:38:31.208846', 'step': 4246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:31.268141', 'step': 4246, 'epoch': 1} {'type': 'loss', 'content': 0.2836468517780304, 'timestamp': '2025-09-10 02:38:31.270820', 'step': 4247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:31.330244', 'step': 4247, 'epoch': 1} {'type': 'loss', 'content': 0.1750044971704483, 'timestamp': '2025-09-10 02:38:31.337570', 'step': 4248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:31.398469', 'step': 4248, 'epoch': 1} {'type': 'loss', 'content': 0.24672362208366394, 'timestamp': '2025-09-10 02:38:31.401293', 'step': 4249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:31.460625', 'step': 4249, 'epoch': 1} {'type': 'loss', 'content': 0.2719947397708893, 'timestamp': '2025-09-10 02:38:31.463133', 'step': 4250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:31.519730', 'step': 4250, 'epoch': 1} {'type': 'loss', 'content': 0.18952785432338715, 'timestamp': '2025-09-10 02:38:31.522104', 'step': 4251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:31.579253', 'step': 4251, 'epoch': 1} {'type': 'loss', 'content': 0.12517966330051422, 'timestamp': '2025-09-10 02:38:31.586247', 'step': 4252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:31.642318', 'step': 4252, 'epoch': 1} {'type': 'loss', 'content': 0.13312236964702606, 'timestamp': '2025-09-10 02:38:31.644726', 'step': 4253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:31.700505', 'step': 4253, 'epoch': 1} {'type': 'loss', 'content': 0.19340838491916656, 'timestamp': '2025-09-10 02:38:31.702874', 'step': 4254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:31.758246', 'step': 4254, 'epoch': 1} {'type': 'loss', 'content': 0.15362463891506195, 'timestamp': '2025-09-10 02:38:31.760591', 'step': 4255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:31.816183', 'step': 4255, 'epoch': 1} {'type': 'loss', 'content': 0.1226581409573555, 'timestamp': '2025-09-10 02:38:31.822938', 'step': 4256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:31.878247', 'step': 4256, 'epoch': 1} {'type': 'loss', 'content': 0.2230258584022522, 'timestamp': '2025-09-10 02:38:31.880772', 'step': 4257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:31.937179', 'step': 4257, 'epoch': 1} {'type': 'loss', 'content': 0.2810492217540741, 'timestamp': '2025-09-10 02:38:31.939473', 'step': 4258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:31.995231', 'step': 4258, 'epoch': 1} {'type': 'loss', 'content': 0.15380284190177917, 'timestamp': '2025-09-10 02:38:31.997557', 'step': 4259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:32.052614', 'step': 4259, 'epoch': 1} {'type': 'loss', 'content': 0.13164639472961426, 'timestamp': '2025-09-10 02:38:32.059223', 'step': 4260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:32.118545', 'step': 4260, 'epoch': 1} {'type': 'loss', 'content': 0.1903177797794342, 'timestamp': '2025-09-10 02:38:32.120716', 'step': 4261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:32.175616', 'step': 4261, 'epoch': 1} {'type': 'loss', 'content': 0.15686200559139252, 'timestamp': '2025-09-10 02:38:32.177889', 'step': 4262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:32.233070', 'step': 4262, 'epoch': 1} {'type': 'loss', 'content': 0.18675558269023895, 'timestamp': '2025-09-10 02:38:32.235380', 'step': 4263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:32.290123', 'step': 4263, 'epoch': 1} {'type': 'loss', 'content': 0.08596038818359375, 'timestamp': '2025-09-10 02:38:32.296327', 'step': 4264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:32.350347', 'step': 4264, 'epoch': 1} {'type': 'loss', 'content': 0.12985782325267792, 'timestamp': '2025-09-10 02:38:32.352591', 'step': 4265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:32.406755', 'step': 4265, 'epoch': 1} {'type': 'loss', 'content': 0.19942332804203033, 'timestamp': '2025-09-10 02:38:32.408980', 'step': 4266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:32.467222', 'step': 4266, 'epoch': 1} {'type': 'loss', 'content': 0.1576993763446808, 'timestamp': '2025-09-10 02:38:32.469612', 'step': 4267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:32.526405', 'step': 4267, 'epoch': 1} {'type': 'loss', 'content': 0.19971288740634918, 'timestamp': '2025-09-10 02:38:32.533010', 'step': 4268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:32.587427', 'step': 4268, 'epoch': 1} {'type': 'loss', 'content': 0.22029173374176025, 'timestamp': '2025-09-10 02:38:32.589386', 'step': 4269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:32.643897', 'step': 4269, 'epoch': 1} {'type': 'loss', 'content': 0.09125654399394989, 'timestamp': '2025-09-10 02:38:32.646324', 'step': 4270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:32.701142', 'step': 4270, 'epoch': 1} {'type': 'loss', 'content': 0.1652909517288208, 'timestamp': '2025-09-10 02:38:32.703429', 'step': 4271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:32.757357', 'step': 4271, 'epoch': 1} {'type': 'loss', 'content': 0.13140057027339935, 'timestamp': '2025-09-10 02:38:32.763715', 'step': 4272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:32.818157', 'step': 4272, 'epoch': 1} {'type': 'loss', 'content': 0.1743193119764328, 'timestamp': '2025-09-10 02:38:32.820436', 'step': 4273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:32.874574', 'step': 4273, 'epoch': 1} {'type': 'loss', 'content': 0.23463475704193115, 'timestamp': '2025-09-10 02:38:32.876924', 'step': 4274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:32.931643', 'step': 4274, 'epoch': 1} {'type': 'loss', 'content': 0.13946910202503204, 'timestamp': '2025-09-10 02:38:32.933705', 'step': 4275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:32.989275', 'step': 4275, 'epoch': 1} {'type': 'loss', 'content': 0.17100849747657776, 'timestamp': '2025-09-10 02:38:32.995688', 'step': 4276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:33.050404', 'step': 4276, 'epoch': 1} {'type': 'loss', 'content': 0.10498323291540146, 'timestamp': '2025-09-10 02:38:33.052644', 'step': 4277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:33.108080', 'step': 4277, 'epoch': 1} {'type': 'loss', 'content': 0.09546457976102829, 'timestamp': '2025-09-10 02:38:33.110226', 'step': 4278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:33.166058', 'step': 4278, 'epoch': 1} {'type': 'loss', 'content': 0.1681274026632309, 'timestamp': '2025-09-10 02:38:33.168293', 'step': 4279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:33.224540', 'step': 4279, 'epoch': 1} {'type': 'loss', 'content': 0.2246968001127243, 'timestamp': '2025-09-10 02:38:33.230964', 'step': 4280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:33.285704', 'step': 4280, 'epoch': 1} {'type': 'loss', 'content': 0.13424062728881836, 'timestamp': '2025-09-10 02:38:33.287757', 'step': 4281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:33.342699', 'step': 4281, 'epoch': 1} {'type': 'loss', 'content': 0.09506217390298843, 'timestamp': '2025-09-10 02:38:33.344695', 'step': 4282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:33.399958', 'step': 4282, 'epoch': 1} {'type': 'loss', 'content': 0.15499918162822723, 'timestamp': '2025-09-10 02:38:33.402229', 'step': 4283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:33.457892', 'step': 4283, 'epoch': 1} {'type': 'loss', 'content': 0.2127275913953781, 'timestamp': '2025-09-10 02:38:33.464667', 'step': 4284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:33.520247', 'step': 4284, 'epoch': 1} {'type': 'loss', 'content': 0.1692063808441162, 'timestamp': '2025-09-10 02:38:33.522697', 'step': 4285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:33.578380', 'step': 4285, 'epoch': 1} {'type': 'loss', 'content': 0.18592406809329987, 'timestamp': '2025-09-10 02:38:33.580871', 'step': 4286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:33.642513', 'step': 4286, 'epoch': 1} {'type': 'loss', 'content': 0.1872846484184265, 'timestamp': '2025-09-10 02:38:33.644828', 'step': 4287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:33.703300', 'step': 4287, 'epoch': 1} {'type': 'loss', 'content': 0.16867610812187195, 'timestamp': '2025-09-10 02:38:33.709988', 'step': 4288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:33.764905', 'step': 4288, 'epoch': 1} {'type': 'loss', 'content': 0.14752928912639618, 'timestamp': '2025-09-10 02:38:33.767272', 'step': 4289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:33.822438', 'step': 4289, 'epoch': 1} {'type': 'loss', 'content': 0.24511770904064178, 'timestamp': '2025-09-10 02:38:33.824795', 'step': 4290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:33.880004', 'step': 4290, 'epoch': 1} {'type': 'loss', 'content': 0.13007856905460358, 'timestamp': '2025-09-10 02:38:33.882361', 'step': 4291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:33.937667', 'step': 4291, 'epoch': 1} {'type': 'loss', 'content': 0.15470045804977417, 'timestamp': '2025-09-10 02:38:33.945370', 'step': 4292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:33.999715', 'step': 4292, 'epoch': 1} {'type': 'loss', 'content': 0.30031880736351013, 'timestamp': '2025-09-10 02:38:34.002100', 'step': 4293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:34.057650', 'step': 4293, 'epoch': 1} {'type': 'loss', 'content': 0.10853501409292221, 'timestamp': '2025-09-10 02:38:34.060077', 'step': 4294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:34.116244', 'step': 4294, 'epoch': 1} {'type': 'loss', 'content': 0.12869250774383545, 'timestamp': '2025-09-10 02:38:34.118586', 'step': 4295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:34.172749', 'step': 4295, 'epoch': 1} {'type': 'loss', 'content': 0.15804585814476013, 'timestamp': '2025-09-10 02:38:34.179258', 'step': 4296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:34.233534', 'step': 4296, 'epoch': 1} {'type': 'loss', 'content': 0.19977903366088867, 'timestamp': '2025-09-10 02:38:34.235569', 'step': 4297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:34.290139', 'step': 4297, 'epoch': 1} {'type': 'loss', 'content': 0.13248895108699799, 'timestamp': '2025-09-10 02:38:34.292506', 'step': 4298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:34.347828', 'step': 4298, 'epoch': 1} {'type': 'loss', 'content': 0.2582106292247772, 'timestamp': '2025-09-10 02:38:34.350325', 'step': 4299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:34.405152', 'step': 4299, 'epoch': 1} {'type': 'loss', 'content': 0.18719948828220367, 'timestamp': '2025-09-10 02:38:34.411687', 'step': 4300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:34.466186', 'step': 4300, 'epoch': 1} {'type': 'loss', 'content': 0.2078636735677719, 'timestamp': '2025-09-10 02:38:34.468497', 'step': 4301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:34.522221', 'step': 4301, 'epoch': 1} {'type': 'loss', 'content': 0.11595295369625092, 'timestamp': '2025-09-10 02:38:34.524417', 'step': 4302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:34.579070', 'step': 4302, 'epoch': 1} {'type': 'loss', 'content': 0.1731865406036377, 'timestamp': '2025-09-10 02:38:34.581197', 'step': 4303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:34.636393', 'step': 4303, 'epoch': 1} {'type': 'loss', 'content': 0.13994640111923218, 'timestamp': '2025-09-10 02:38:34.642795', 'step': 4304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:34.697060', 'step': 4304, 'epoch': 1} {'type': 'loss', 'content': 0.17265759408473969, 'timestamp': '2025-09-10 02:38:34.699324', 'step': 4305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:34.754236', 'step': 4305, 'epoch': 1} {'type': 'loss', 'content': 0.10376984626054764, 'timestamp': '2025-09-10 02:38:34.756384', 'step': 4306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:34.811677', 'step': 4306, 'epoch': 1} {'type': 'loss', 'content': 0.12500280141830444, 'timestamp': '2025-09-10 02:38:34.814084', 'step': 4307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:34.869465', 'step': 4307, 'epoch': 1} {'type': 'loss', 'content': 0.1444704830646515, 'timestamp': '2025-09-10 02:38:34.875991', 'step': 4308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:34.930907', 'step': 4308, 'epoch': 1} {'type': 'loss', 'content': 0.16416236758232117, 'timestamp': '2025-09-10 02:38:34.932969', 'step': 4309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:34.988935', 'step': 4309, 'epoch': 1} {'type': 'loss', 'content': 0.20722626149654388, 'timestamp': '2025-09-10 02:38:34.991416', 'step': 4310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:35.048801', 'step': 4310, 'epoch': 1} {'type': 'loss', 'content': 0.07602324336767197, 'timestamp': '2025-09-10 02:38:35.050984', 'step': 4311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:35.108929', 'step': 4311, 'epoch': 1} {'type': 'loss', 'content': 0.17308920621871948, 'timestamp': '2025-09-10 02:38:35.115828', 'step': 4312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:35.171934', 'step': 4312, 'epoch': 1} {'type': 'loss', 'content': 0.13504992425441742, 'timestamp': '2025-09-10 02:38:35.174375', 'step': 4313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:35.229652', 'step': 4313, 'epoch': 1} {'type': 'loss', 'content': 0.20738466084003448, 'timestamp': '2025-09-10 02:38:35.231976', 'step': 4314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:35.288423', 'step': 4314, 'epoch': 1} {'type': 'loss', 'content': 0.2142115980386734, 'timestamp': '2025-09-10 02:38:35.290832', 'step': 4315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:35.347366', 'step': 4315, 'epoch': 1} {'type': 'loss', 'content': 0.12526853382587433, 'timestamp': '2025-09-10 02:38:35.353994', 'step': 4316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:35.409218', 'step': 4316, 'epoch': 1} {'type': 'loss', 'content': 0.29577410221099854, 'timestamp': '2025-09-10 02:38:35.411538', 'step': 4317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:35.466011', 'step': 4317, 'epoch': 1} {'type': 'loss', 'content': 0.13274329900741577, 'timestamp': '2025-09-10 02:38:35.468476', 'step': 4318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:35.523483', 'step': 4318, 'epoch': 1} {'type': 'loss', 'content': 0.16703695058822632, 'timestamp': '2025-09-10 02:38:35.525751', 'step': 4319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:35.581013', 'step': 4319, 'epoch': 1} {'type': 'loss', 'content': 0.16715611517429352, 'timestamp': '2025-09-10 02:38:35.587452', 'step': 4320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:35.642541', 'step': 4320, 'epoch': 1} {'type': 'loss', 'content': 0.13005581498146057, 'timestamp': '2025-09-10 02:38:35.644660', 'step': 4321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:35.700333', 'step': 4321, 'epoch': 1} {'type': 'loss', 'content': 0.17882020771503448, 'timestamp': '2025-09-10 02:38:35.702637', 'step': 4322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:35.759969', 'step': 4322, 'epoch': 1} {'type': 'loss', 'content': 0.21390309929847717, 'timestamp': '2025-09-10 02:38:35.762217', 'step': 4323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:35.817378', 'step': 4323, 'epoch': 1} {'type': 'loss', 'content': 0.18746495246887207, 'timestamp': '2025-09-10 02:38:35.823831', 'step': 4324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:35.878084', 'step': 4324, 'epoch': 1} {'type': 'loss', 'content': 0.13396002352237701, 'timestamp': '2025-09-10 02:38:35.880110', 'step': 4325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:35.935716', 'step': 4325, 'epoch': 1} {'type': 'loss', 'content': 0.15524278581142426, 'timestamp': '2025-09-10 02:38:35.937818', 'step': 4326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:35.992972', 'step': 4326, 'epoch': 1} {'type': 'loss', 'content': 0.29824405908584595, 'timestamp': '2025-09-10 02:38:35.995469', 'step': 4327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:36.051312', 'step': 4327, 'epoch': 1} {'type': 'loss', 'content': 0.16329964995384216, 'timestamp': '2025-09-10 02:38:36.058025', 'step': 4328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:36.113149', 'step': 4328, 'epoch': 1} {'type': 'loss', 'content': 0.15979796648025513, 'timestamp': '2025-09-10 02:38:36.115562', 'step': 4329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:36.171509', 'step': 4329, 'epoch': 1} {'type': 'loss', 'content': 0.21247820556163788, 'timestamp': '2025-09-10 02:38:36.173792', 'step': 4330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:36.232317', 'step': 4330, 'epoch': 1} {'type': 'loss', 'content': 0.19565552473068237, 'timestamp': '2025-09-10 02:38:36.234498', 'step': 4331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:36.290765', 'step': 4331, 'epoch': 1} {'type': 'loss', 'content': 0.09449151158332825, 'timestamp': '2025-09-10 02:38:36.297581', 'step': 4332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:36.354483', 'step': 4332, 'epoch': 1} {'type': 'loss', 'content': 0.12488608062267303, 'timestamp': '2025-09-10 02:38:36.356687', 'step': 4333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:36.413558', 'step': 4333, 'epoch': 1} {'type': 'loss', 'content': 0.0944771096110344, 'timestamp': '2025-09-10 02:38:36.415876', 'step': 4334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:36.471994', 'step': 4334, 'epoch': 1} {'type': 'loss', 'content': 0.11275958269834518, 'timestamp': '2025-09-10 02:38:36.474297', 'step': 4335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:36.529190', 'step': 4335, 'epoch': 1} {'type': 'loss', 'content': 0.18198931217193604, 'timestamp': '2025-09-10 02:38:36.535705', 'step': 4336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:36.589529', 'step': 4336, 'epoch': 1} {'type': 'loss', 'content': 0.13793793320655823, 'timestamp': '2025-09-10 02:38:36.591883', 'step': 4337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:36.650813', 'step': 4337, 'epoch': 1} {'type': 'loss', 'content': 0.2331835776567459, 'timestamp': '2025-09-10 02:38:36.653032', 'step': 4338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:36.708704', 'step': 4338, 'epoch': 1} {'type': 'loss', 'content': 0.186198890209198, 'timestamp': '2025-09-10 02:38:36.711147', 'step': 4339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:36.767606', 'step': 4339, 'epoch': 1} {'type': 'loss', 'content': 0.12595199048519135, 'timestamp': '2025-09-10 02:38:36.774177', 'step': 4340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:36.829284', 'step': 4340, 'epoch': 1} {'type': 'loss', 'content': 0.1468924582004547, 'timestamp': '2025-09-10 02:38:36.832008', 'step': 4341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:36.887185', 'step': 4341, 'epoch': 1} {'type': 'loss', 'content': 0.16648519039154053, 'timestamp': '2025-09-10 02:38:36.889576', 'step': 4342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:36.944953', 'step': 4342, 'epoch': 1} {'type': 'loss', 'content': 0.18439573049545288, 'timestamp': '2025-09-10 02:38:36.947316', 'step': 4343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:37.003359', 'step': 4343, 'epoch': 1} {'type': 'loss', 'content': 0.186371847987175, 'timestamp': '2025-09-10 02:38:37.009626', 'step': 4344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:37.063777', 'step': 4344, 'epoch': 1} {'type': 'loss', 'content': 0.10026806592941284, 'timestamp': '2025-09-10 02:38:37.066249', 'step': 4345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:37.122128', 'step': 4345, 'epoch': 1} {'type': 'loss', 'content': 0.1352115273475647, 'timestamp': '2025-09-10 02:38:37.124436', 'step': 4346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:37.180087', 'step': 4346, 'epoch': 1} {'type': 'loss', 'content': 0.21036545932292938, 'timestamp': '2025-09-10 02:38:37.182475', 'step': 4347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:37.238739', 'step': 4347, 'epoch': 1} {'type': 'loss', 'content': 0.21524213254451752, 'timestamp': '2025-09-10 02:38:37.245498', 'step': 4348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:37.301031', 'step': 4348, 'epoch': 1} {'type': 'loss', 'content': 0.16149064898490906, 'timestamp': '2025-09-10 02:38:37.303333', 'step': 4349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:37.359518', 'step': 4349, 'epoch': 1} {'type': 'loss', 'content': 0.219710111618042, 'timestamp': '2025-09-10 02:38:37.361899', 'step': 4350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:37.418433', 'step': 4350, 'epoch': 1} {'type': 'loss', 'content': 0.13015465438365936, 'timestamp': '2025-09-10 02:38:37.420922', 'step': 4351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:37.476828', 'step': 4351, 'epoch': 1} {'type': 'loss', 'content': 0.20166750252246857, 'timestamp': '2025-09-10 02:38:37.483549', 'step': 4352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:37.538706', 'step': 4352, 'epoch': 1} {'type': 'loss', 'content': 0.12681154906749725, 'timestamp': '2025-09-10 02:38:37.540813', 'step': 4353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:37.595789', 'step': 4353, 'epoch': 1} {'type': 'loss', 'content': 0.20238608121871948, 'timestamp': '2025-09-10 02:38:37.597931', 'step': 4354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:37.652493', 'step': 4354, 'epoch': 1} {'type': 'loss', 'content': 0.21506313979625702, 'timestamp': '2025-09-10 02:38:37.654864', 'step': 4355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:37.709311', 'step': 4355, 'epoch': 1} {'type': 'loss', 'content': 0.07269290089607239, 'timestamp': '2025-09-10 02:38:37.717130', 'step': 4356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:37.772212', 'step': 4356, 'epoch': 1} {'type': 'loss', 'content': 0.24107132852077484, 'timestamp': '2025-09-10 02:38:37.774816', 'step': 4357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:37.831429', 'step': 4357, 'epoch': 1} {'type': 'loss', 'content': 0.1909540891647339, 'timestamp': '2025-09-10 02:38:37.834018', 'step': 4358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:37.889960', 'step': 4358, 'epoch': 1} {'type': 'loss', 'content': 0.22466449439525604, 'timestamp': '2025-09-10 02:38:37.892214', 'step': 4359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:37.948277', 'step': 4359, 'epoch': 1} {'type': 'loss', 'content': 0.052321791648864746, 'timestamp': '2025-09-10 02:38:37.954790', 'step': 4360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:38.009423', 'step': 4360, 'epoch': 1} {'type': 'loss', 'content': 0.1325509399175644, 'timestamp': '2025-09-10 02:38:38.011491', 'step': 4361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:38.067598', 'step': 4361, 'epoch': 1} {'type': 'loss', 'content': 0.24739213287830353, 'timestamp': '2025-09-10 02:38:38.070035', 'step': 4362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:38.126724', 'step': 4362, 'epoch': 1} {'type': 'loss', 'content': 0.11955104768276215, 'timestamp': '2025-09-10 02:38:38.129045', 'step': 4363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:38.185778', 'step': 4363, 'epoch': 1} {'type': 'loss', 'content': 0.16991791129112244, 'timestamp': '2025-09-10 02:38:38.192584', 'step': 4364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:38.248246', 'step': 4364, 'epoch': 1} {'type': 'loss', 'content': 0.15176922082901, 'timestamp': '2025-09-10 02:38:38.250745', 'step': 4365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:38.306532', 'step': 4365, 'epoch': 1} {'type': 'loss', 'content': 0.16786986589431763, 'timestamp': '2025-09-10 02:38:38.309295', 'step': 4366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:38.368029', 'step': 4366, 'epoch': 1} {'type': 'loss', 'content': 0.1724328249692917, 'timestamp': '2025-09-10 02:38:38.370511', 'step': 4367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:38.428679', 'step': 4367, 'epoch': 1} {'type': 'loss', 'content': 0.3016659617424011, 'timestamp': '2025-09-10 02:38:38.435293', 'step': 4368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:38.492148', 'step': 4368, 'epoch': 1} {'type': 'loss', 'content': 0.14291134476661682, 'timestamp': '2025-09-10 02:38:38.494444', 'step': 4369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:38.560446', 'step': 4369, 'epoch': 1} {'type': 'loss', 'content': 0.3145480155944824, 'timestamp': '2025-09-10 02:38:38.562624', 'step': 4370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:38.618121', 'step': 4370, 'epoch': 1} {'type': 'loss', 'content': 0.18359418213367462, 'timestamp': '2025-09-10 02:38:38.620625', 'step': 4371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:38.677442', 'step': 4371, 'epoch': 1} {'type': 'loss', 'content': 0.23462672531604767, 'timestamp': '2025-09-10 02:38:38.684184', 'step': 4372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:38.738949', 'step': 4372, 'epoch': 1} {'type': 'loss', 'content': 0.15446826815605164, 'timestamp': '2025-09-10 02:38:38.741067', 'step': 4373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:38.797434', 'step': 4373, 'epoch': 1} {'type': 'loss', 'content': 0.2504463791847229, 'timestamp': '2025-09-10 02:38:38.799769', 'step': 4374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:38.855400', 'step': 4374, 'epoch': 1} {'type': 'loss', 'content': 0.2727509140968323, 'timestamp': '2025-09-10 02:38:38.857497', 'step': 4375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:38.913207', 'step': 4375, 'epoch': 1} {'type': 'loss', 'content': 0.17506590485572815, 'timestamp': '2025-09-10 02:38:38.919715', 'step': 4376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:38.974426', 'step': 4376, 'epoch': 1} {'type': 'loss', 'content': 0.09811548888683319, 'timestamp': '2025-09-10 02:38:38.976513', 'step': 4377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:39.032894', 'step': 4377, 'epoch': 1} {'type': 'loss', 'content': 0.12169614434242249, 'timestamp': '2025-09-10 02:38:39.034948', 'step': 4378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:39.090308', 'step': 4378, 'epoch': 1} {'type': 'loss', 'content': 0.199014350771904, 'timestamp': '2025-09-10 02:38:39.092588', 'step': 4379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:39.147772', 'step': 4379, 'epoch': 1} {'type': 'loss', 'content': 0.13078656792640686, 'timestamp': '2025-09-10 02:38:39.154265', 'step': 4380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:39.208748', 'step': 4380, 'epoch': 1} {'type': 'loss', 'content': 0.18817003071308136, 'timestamp': '2025-09-10 02:38:39.211041', 'step': 4381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:39.272537', 'step': 4381, 'epoch': 1} {'type': 'loss', 'content': 0.2681583762168884, 'timestamp': '2025-09-10 02:38:39.274698', 'step': 4382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:39.334744', 'step': 4382, 'epoch': 1} {'type': 'loss', 'content': 0.13924555480480194, 'timestamp': '2025-09-10 02:38:39.336927', 'step': 4383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:39.393460', 'step': 4383, 'epoch': 1} {'type': 'loss', 'content': 0.12254363298416138, 'timestamp': '2025-09-10 02:38:39.400202', 'step': 4384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:39.456295', 'step': 4384, 'epoch': 1} {'type': 'loss', 'content': 0.18570634722709656, 'timestamp': '2025-09-10 02:38:39.459004', 'step': 4385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:39.521229', 'step': 4385, 'epoch': 1} {'type': 'loss', 'content': 0.21241486072540283, 'timestamp': '2025-09-10 02:38:39.523803', 'step': 4386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:39.582363', 'step': 4386, 'epoch': 1} {'type': 'loss', 'content': 0.19815272092819214, 'timestamp': '2025-09-10 02:38:39.584848', 'step': 4387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:39.640804', 'step': 4387, 'epoch': 1} {'type': 'loss', 'content': 0.10912002623081207, 'timestamp': '2025-09-10 02:38:39.647532', 'step': 4388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:39.702429', 'step': 4388, 'epoch': 1} {'type': 'loss', 'content': 0.16278234124183655, 'timestamp': '2025-09-10 02:38:39.704769', 'step': 4389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:39.760574', 'step': 4389, 'epoch': 1} {'type': 'loss', 'content': 0.1611507683992386, 'timestamp': '2025-09-10 02:38:39.762856', 'step': 4390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:39.819977', 'step': 4390, 'epoch': 1} {'type': 'loss', 'content': 0.14407317340373993, 'timestamp': '2025-09-10 02:38:39.822406', 'step': 4391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:39.881171', 'step': 4391, 'epoch': 1} {'type': 'loss', 'content': 0.18286052346229553, 'timestamp': '2025-09-10 02:38:39.887906', 'step': 4392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:39.945735', 'step': 4392, 'epoch': 1} {'type': 'loss', 'content': 0.1314305067062378, 'timestamp': '2025-09-10 02:38:39.949082', 'step': 4393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:40.006167', 'step': 4393, 'epoch': 1} {'type': 'loss', 'content': 0.16917945444583893, 'timestamp': '2025-09-10 02:38:40.008461', 'step': 4394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:40.064582', 'step': 4394, 'epoch': 1} {'type': 'loss', 'content': 0.1520203799009323, 'timestamp': '2025-09-10 02:38:40.066987', 'step': 4395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:40.123276', 'step': 4395, 'epoch': 1} {'type': 'loss', 'content': 0.22797496616840363, 'timestamp': '2025-09-10 02:38:40.130028', 'step': 4396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:40.184627', 'step': 4396, 'epoch': 1} {'type': 'loss', 'content': 0.1573871374130249, 'timestamp': '2025-09-10 02:38:40.186967', 'step': 4397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:40.242427', 'step': 4397, 'epoch': 1} {'type': 'loss', 'content': 0.1634473204612732, 'timestamp': '2025-09-10 02:38:40.244515', 'step': 4398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:40.300484', 'step': 4398, 'epoch': 1} {'type': 'loss', 'content': 0.1339777708053589, 'timestamp': '2025-09-10 02:38:40.302800', 'step': 4399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:40.358481', 'step': 4399, 'epoch': 1} {'type': 'loss', 'content': 0.1746656745672226, 'timestamp': '2025-09-10 02:38:40.365047', 'step': 4400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:40.419988', 'step': 4400, 'epoch': 1} {'type': 'loss', 'content': 0.12571626901626587, 'timestamp': '2025-09-10 02:38:40.422364', 'step': 4401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:40.478327', 'step': 4401, 'epoch': 1} {'type': 'loss', 'content': 0.2554924190044403, 'timestamp': '2025-09-10 02:38:40.480417', 'step': 4402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:40.536449', 'step': 4402, 'epoch': 1} {'type': 'loss', 'content': 0.24288740754127502, 'timestamp': '2025-09-10 02:38:40.539153', 'step': 4403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:40.594459', 'step': 4403, 'epoch': 1} {'type': 'loss', 'content': 0.20114447176456451, 'timestamp': '2025-09-10 02:38:40.600820', 'step': 4404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:40.655476', 'step': 4404, 'epoch': 1} {'type': 'loss', 'content': 0.0875491127371788, 'timestamp': '2025-09-10 02:38:40.657851', 'step': 4405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:40.712435', 'step': 4405, 'epoch': 1} {'type': 'loss', 'content': 0.16341166198253632, 'timestamp': '2025-09-10 02:38:40.714800', 'step': 4406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:40.769617', 'step': 4406, 'epoch': 1} {'type': 'loss', 'content': 0.10362628102302551, 'timestamp': '2025-09-10 02:38:40.771938', 'step': 4407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:40.827357', 'step': 4407, 'epoch': 1} {'type': 'loss', 'content': 0.15716631710529327, 'timestamp': '2025-09-10 02:38:40.833864', 'step': 4408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:40.888994', 'step': 4408, 'epoch': 1} {'type': 'loss', 'content': 0.12582264840602875, 'timestamp': '2025-09-10 02:38:40.891318', 'step': 4409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:40.945425', 'step': 4409, 'epoch': 1} {'type': 'loss', 'content': 0.17943981289863586, 'timestamp': '2025-09-10 02:38:40.947606', 'step': 4410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:41.003465', 'step': 4410, 'epoch': 1} {'type': 'loss', 'content': 0.21186959743499756, 'timestamp': '2025-09-10 02:38:41.005700', 'step': 4411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:41.061516', 'step': 4411, 'epoch': 1} {'type': 'loss', 'content': 0.14789842069149017, 'timestamp': '2025-09-10 02:38:41.068149', 'step': 4412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:41.123060', 'step': 4412, 'epoch': 1} {'type': 'loss', 'content': 0.1555834710597992, 'timestamp': '2025-09-10 02:38:41.125154', 'step': 4413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:41.181264', 'step': 4413, 'epoch': 1} {'type': 'loss', 'content': 0.22133474051952362, 'timestamp': '2025-09-10 02:38:41.183305', 'step': 4414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:41.239284', 'step': 4414, 'epoch': 1} {'type': 'loss', 'content': 0.08758264780044556, 'timestamp': '2025-09-10 02:38:41.241594', 'step': 4415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:41.297947', 'step': 4415, 'epoch': 1} {'type': 'loss', 'content': 0.17781762778759003, 'timestamp': '2025-09-10 02:38:41.304507', 'step': 4416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:41.367961', 'step': 4416, 'epoch': 1} {'type': 'loss', 'content': 0.13277998566627502, 'timestamp': '2025-09-10 02:38:41.372854', 'step': 4417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:41.428847', 'step': 4417, 'epoch': 1} {'type': 'loss', 'content': 0.17079806327819824, 'timestamp': '2025-09-10 02:38:41.434281', 'step': 4418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:41.491854', 'step': 4418, 'epoch': 1} {'type': 'loss', 'content': 0.15931059420108795, 'timestamp': '2025-09-10 02:38:41.494154', 'step': 4419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:41.555882', 'step': 4419, 'epoch': 1} {'type': 'loss', 'content': 0.19848410785198212, 'timestamp': '2025-09-10 02:38:41.562673', 'step': 4420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:41.619193', 'step': 4420, 'epoch': 1} {'type': 'loss', 'content': 0.14113005995750427, 'timestamp': '2025-09-10 02:38:41.622322', 'step': 4421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:41.678285', 'step': 4421, 'epoch': 1} {'type': 'loss', 'content': 0.2237405925989151, 'timestamp': '2025-09-10 02:38:41.681044', 'step': 4422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:41.738450', 'step': 4422, 'epoch': 1} {'type': 'loss', 'content': 0.214356929063797, 'timestamp': '2025-09-10 02:38:41.740680', 'step': 4423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:41.795822', 'step': 4423, 'epoch': 1} {'type': 'loss', 'content': 0.14091360569000244, 'timestamp': '2025-09-10 02:38:41.802344', 'step': 4424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:41.862795', 'step': 4424, 'epoch': 1} {'type': 'loss', 'content': 0.19953812658786774, 'timestamp': '2025-09-10 02:38:41.865277', 'step': 4425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:41.922626', 'step': 4425, 'epoch': 1} {'type': 'loss', 'content': 0.2046259641647339, 'timestamp': '2025-09-10 02:38:41.924948', 'step': 4426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:41.980085', 'step': 4426, 'epoch': 1} {'type': 'loss', 'content': 0.23746082186698914, 'timestamp': '2025-09-10 02:38:41.982200', 'step': 4427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:42.038933', 'step': 4427, 'epoch': 1} {'type': 'loss', 'content': 0.09983903169631958, 'timestamp': '2025-09-10 02:38:42.045331', 'step': 4428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:42.099888', 'step': 4428, 'epoch': 1} {'type': 'loss', 'content': 0.20865754783153534, 'timestamp': '2025-09-10 02:38:42.102159', 'step': 4429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:42.157719', 'step': 4429, 'epoch': 1} {'type': 'loss', 'content': 0.1316593438386917, 'timestamp': '2025-09-10 02:38:42.161347', 'step': 4430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:42.218257', 'step': 4430, 'epoch': 1} {'type': 'loss', 'content': 0.16528800129890442, 'timestamp': '2025-09-10 02:38:42.220572', 'step': 4431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:42.275445', 'step': 4431, 'epoch': 1} {'type': 'loss', 'content': 0.23544630408287048, 'timestamp': '2025-09-10 02:38:42.282745', 'step': 4432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:42.344763', 'step': 4432, 'epoch': 1} {'type': 'loss', 'content': 0.19679543375968933, 'timestamp': '2025-09-10 02:38:42.346768', 'step': 4433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:42.401339', 'step': 4433, 'epoch': 1} {'type': 'loss', 'content': 0.19104130566120148, 'timestamp': '2025-09-10 02:38:42.403689', 'step': 4434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:42.459857', 'step': 4434, 'epoch': 1} {'type': 'loss', 'content': 0.26129859685897827, 'timestamp': '2025-09-10 02:38:42.463603', 'step': 4435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:42.518596', 'step': 4435, 'epoch': 1} {'type': 'loss', 'content': 0.08493972569704056, 'timestamp': '2025-09-10 02:38:42.525072', 'step': 4436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:42.589326', 'step': 4436, 'epoch': 1} {'type': 'loss', 'content': 0.23689709603786469, 'timestamp': '2025-09-10 02:38:42.591773', 'step': 4437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:42.649955', 'step': 4437, 'epoch': 1} {'type': 'loss', 'content': 0.12708687782287598, 'timestamp': '2025-09-10 02:38:42.652292', 'step': 4438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:42.707613', 'step': 4438, 'epoch': 1} {'type': 'loss', 'content': 0.15923932194709778, 'timestamp': '2025-09-10 02:38:42.710085', 'step': 4439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:42.765081', 'step': 4439, 'epoch': 1} {'type': 'loss', 'content': 0.14552640914916992, 'timestamp': '2025-09-10 02:38:42.771579', 'step': 4440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:42.829342', 'step': 4440, 'epoch': 1} {'type': 'loss', 'content': 0.37195050716400146, 'timestamp': '2025-09-10 02:38:42.832796', 'step': 4441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:42.891252', 'step': 4441, 'epoch': 1} {'type': 'loss', 'content': 0.16153019666671753, 'timestamp': '2025-09-10 02:38:42.893589', 'step': 4442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:42.950108', 'step': 4442, 'epoch': 1} {'type': 'loss', 'content': 0.18875427544116974, 'timestamp': '2025-09-10 02:38:42.956967', 'step': 4443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:43.012276', 'step': 4443, 'epoch': 1} {'type': 'loss', 'content': 0.22320936620235443, 'timestamp': '2025-09-10 02:38:43.018964', 'step': 4444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:43.072590', 'step': 4444, 'epoch': 1} {'type': 'loss', 'content': 0.16439959406852722, 'timestamp': '2025-09-10 02:38:43.074901', 'step': 4445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:43.130062', 'step': 4445, 'epoch': 1} {'type': 'loss', 'content': 0.13258981704711914, 'timestamp': '2025-09-10 02:38:43.132275', 'step': 4446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:43.191580', 'step': 4446, 'epoch': 1} {'type': 'loss', 'content': 0.09020564705133438, 'timestamp': '2025-09-10 02:38:43.193812', 'step': 4447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:43.249140', 'step': 4447, 'epoch': 1} {'type': 'loss', 'content': 0.21259285509586334, 'timestamp': '2025-09-10 02:38:43.255546', 'step': 4448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:43.310140', 'step': 4448, 'epoch': 1} {'type': 'loss', 'content': 0.11455088108778, 'timestamp': '2025-09-10 02:38:43.312485', 'step': 4449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:43.369733', 'step': 4449, 'epoch': 1} {'type': 'loss', 'content': 0.28179702162742615, 'timestamp': '2025-09-10 02:38:43.372073', 'step': 4450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:43.426296', 'step': 4450, 'epoch': 1} {'type': 'loss', 'content': 0.20794107019901276, 'timestamp': '2025-09-10 02:38:43.428564', 'step': 4451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:43.482582', 'step': 4451, 'epoch': 1} {'type': 'loss', 'content': 0.1444723755121231, 'timestamp': '2025-09-10 02:38:43.488716', 'step': 4452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:43.543015', 'step': 4452, 'epoch': 1} {'type': 'loss', 'content': 0.14710240066051483, 'timestamp': '2025-09-10 02:38:43.545627', 'step': 4453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:43.599407', 'step': 4453, 'epoch': 1} {'type': 'loss', 'content': 0.14653931558132172, 'timestamp': '2025-09-10 02:38:43.601434', 'step': 4454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:43.655386', 'step': 4454, 'epoch': 1} {'type': 'loss', 'content': 0.27035123109817505, 'timestamp': '2025-09-10 02:38:43.657869', 'step': 4455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:43.713376', 'step': 4455, 'epoch': 1} {'type': 'loss', 'content': 0.1249530240893364, 'timestamp': '2025-09-10 02:38:43.719923', 'step': 4456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:43.774106', 'step': 4456, 'epoch': 1} {'type': 'loss', 'content': 0.08867917954921722, 'timestamp': '2025-09-10 02:38:43.776428', 'step': 4457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:43.832083', 'step': 4457, 'epoch': 1} {'type': 'loss', 'content': 0.17577388882637024, 'timestamp': '2025-09-10 02:38:43.834470', 'step': 4458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:43.890133', 'step': 4458, 'epoch': 1} {'type': 'loss', 'content': 0.09125611931085587, 'timestamp': '2025-09-10 02:38:43.892626', 'step': 4459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:43.949085', 'step': 4459, 'epoch': 1} {'type': 'loss', 'content': 0.18825778365135193, 'timestamp': '2025-09-10 02:38:43.955701', 'step': 4460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:44.010505', 'step': 4460, 'epoch': 1} {'type': 'loss', 'content': 0.16380348801612854, 'timestamp': '2025-09-10 02:38:44.012782', 'step': 4461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:44.067011', 'step': 4461, 'epoch': 1} {'type': 'loss', 'content': 0.08821273595094681, 'timestamp': '2025-09-10 02:38:44.069397', 'step': 4462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:44.124278', 'step': 4462, 'epoch': 1} {'type': 'loss', 'content': 0.17817598581314087, 'timestamp': '2025-09-10 02:38:44.126707', 'step': 4463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:44.180870', 'step': 4463, 'epoch': 1} {'type': 'loss', 'content': 0.22167246043682098, 'timestamp': '2025-09-10 02:38:44.187238', 'step': 4464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:44.241199', 'step': 4464, 'epoch': 1} {'type': 'loss', 'content': 0.15013298392295837, 'timestamp': '2025-09-10 02:38:44.243482', 'step': 4465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:44.298788', 'step': 4465, 'epoch': 1} {'type': 'loss', 'content': 0.1996585875749588, 'timestamp': '2025-09-10 02:38:44.301065', 'step': 4466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:44.355736', 'step': 4466, 'epoch': 1} {'type': 'loss', 'content': 0.175369992852211, 'timestamp': '2025-09-10 02:38:44.357963', 'step': 4467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:44.416650', 'step': 4467, 'epoch': 1} {'type': 'loss', 'content': 0.1494874507188797, 'timestamp': '2025-09-10 02:38:44.423297', 'step': 4468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:44.478010', 'step': 4468, 'epoch': 1} {'type': 'loss', 'content': 0.14944571256637573, 'timestamp': '2025-09-10 02:38:44.480270', 'step': 4469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:44.538902', 'step': 4469, 'epoch': 1} {'type': 'loss', 'content': 0.14348414540290833, 'timestamp': '2025-09-10 02:38:44.542223', 'step': 4470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:44.597150', 'step': 4470, 'epoch': 1} {'type': 'loss', 'content': 0.24150770902633667, 'timestamp': '2025-09-10 02:38:44.599535', 'step': 4471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:44.654453', 'step': 4471, 'epoch': 1} {'type': 'loss', 'content': 0.12560686469078064, 'timestamp': '2025-09-10 02:38:44.661269', 'step': 4472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:44.716369', 'step': 4472, 'epoch': 1} {'type': 'loss', 'content': 0.21369248628616333, 'timestamp': '2025-09-10 02:38:44.718919', 'step': 4473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:44.775575', 'step': 4473, 'epoch': 1} {'type': 'loss', 'content': 0.07606607675552368, 'timestamp': '2025-09-10 02:38:44.777766', 'step': 4474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:44.832533', 'step': 4474, 'epoch': 1} {'type': 'loss', 'content': 0.21491779386997223, 'timestamp': '2025-09-10 02:38:44.834439', 'step': 4475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:44.888640', 'step': 4475, 'epoch': 1} {'type': 'loss', 'content': 0.17845548689365387, 'timestamp': '2025-09-10 02:38:44.894849', 'step': 4476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:44.951252', 'step': 4476, 'epoch': 1} {'type': 'loss', 'content': 0.125014990568161, 'timestamp': '2025-09-10 02:38:44.953571', 'step': 4477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:45.009572', 'step': 4477, 'epoch': 1} {'type': 'loss', 'content': 0.12755528092384338, 'timestamp': '2025-09-10 02:38:45.011791', 'step': 4478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:45.068754', 'step': 4478, 'epoch': 1} {'type': 'loss', 'content': 0.07891827076673508, 'timestamp': '2025-09-10 02:38:45.071313', 'step': 4479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:45.127867', 'step': 4479, 'epoch': 1} {'type': 'loss', 'content': 0.18909378349781036, 'timestamp': '2025-09-10 02:38:45.134604', 'step': 4480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:45.189450', 'step': 4480, 'epoch': 1} {'type': 'loss', 'content': 0.1953115016222, 'timestamp': '2025-09-10 02:38:45.192475', 'step': 4481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:45.247949', 'step': 4481, 'epoch': 1} {'type': 'loss', 'content': 0.18614999949932098, 'timestamp': '2025-09-10 02:38:45.250013', 'step': 4482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:45.303856', 'step': 4482, 'epoch': 1} {'type': 'loss', 'content': 0.12797054648399353, 'timestamp': '2025-09-10 02:38:45.305888', 'step': 4483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:45.358974', 'step': 4483, 'epoch': 1} {'type': 'loss', 'content': 0.151297464966774, 'timestamp': '2025-09-10 02:38:45.365021', 'step': 4484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:45.417974', 'step': 4484, 'epoch': 1} {'type': 'loss', 'content': 0.10522674769163132, 'timestamp': '2025-09-10 02:38:45.420070', 'step': 4485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:45.473348', 'step': 4485, 'epoch': 1} {'type': 'loss', 'content': 0.23502691090106964, 'timestamp': '2025-09-10 02:38:45.475574', 'step': 4486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:45.529626', 'step': 4486, 'epoch': 1} {'type': 'loss', 'content': 0.17224736511707306, 'timestamp': '2025-09-10 02:38:45.531818', 'step': 4487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:45.588116', 'step': 4487, 'epoch': 1} {'type': 'loss', 'content': 0.10425211489200592, 'timestamp': '2025-09-10 02:38:45.594416', 'step': 4488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:45.648136', 'step': 4488, 'epoch': 1} {'type': 'loss', 'content': 0.1523222029209137, 'timestamp': '2025-09-10 02:38:45.650630', 'step': 4489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:45.709709', 'step': 4489, 'epoch': 1} {'type': 'loss', 'content': 0.10608316957950592, 'timestamp': '2025-09-10 02:38:45.711896', 'step': 4490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:45.765275', 'step': 4490, 'epoch': 1} {'type': 'loss', 'content': 0.17367511987686157, 'timestamp': '2025-09-10 02:38:45.767355', 'step': 4491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:45.821087', 'step': 4491, 'epoch': 1} {'type': 'loss', 'content': 0.1859728991985321, 'timestamp': '2025-09-10 02:38:45.827123', 'step': 4492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:45.880139', 'step': 4492, 'epoch': 1} {'type': 'loss', 'content': 0.15191803872585297, 'timestamp': '2025-09-10 02:38:45.882162', 'step': 4493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:45.935408', 'step': 4493, 'epoch': 1} {'type': 'loss', 'content': 0.11334419250488281, 'timestamp': '2025-09-10 02:38:45.937510', 'step': 4494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:45.993688', 'step': 4494, 'epoch': 1} {'type': 'loss', 'content': 0.12499935179948807, 'timestamp': '2025-09-10 02:38:45.995934', 'step': 4495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:46.053229', 'step': 4495, 'epoch': 1} {'type': 'loss', 'content': 0.15410931408405304, 'timestamp': '2025-09-10 02:38:46.059568', 'step': 4496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:46.112704', 'step': 4496, 'epoch': 1} {'type': 'loss', 'content': 0.29922905564308167, 'timestamp': '2025-09-10 02:38:46.114752', 'step': 4497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:46.168591', 'step': 4497, 'epoch': 1} {'type': 'loss', 'content': 0.16208168864250183, 'timestamp': '2025-09-10 02:38:46.170604', 'step': 4498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:46.224142', 'step': 4498, 'epoch': 1} {'type': 'loss', 'content': 0.23509176075458527, 'timestamp': '2025-09-10 02:38:46.226183', 'step': 4499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:46.279136', 'step': 4499, 'epoch': 1} {'type': 'loss', 'content': 0.11865168064832687, 'timestamp': '2025-09-10 02:38:46.285218', 'step': 4500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-09-10 02:38:46.703902', 'step': 4500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:46.762952', 'step': 4500, 'epoch': 1} {'type': 'loss', 'content': 0.14468632638454437, 'timestamp': '2025-09-10 02:38:46.765312', 'step': 4501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:46.820507', 'step': 4501, 'epoch': 1} {'type': 'loss', 'content': 0.09979747235774994, 'timestamp': '2025-09-10 02:38:46.822963', 'step': 4502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:46.877999', 'step': 4502, 'epoch': 1} {'type': 'loss', 'content': 0.2681833505630493, 'timestamp': '2025-09-10 02:38:46.880430', 'step': 4503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:46.935964', 'step': 4503, 'epoch': 1} {'type': 'loss', 'content': 0.17562799155712128, 'timestamp': '2025-09-10 02:38:46.942053', 'step': 4504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:46.995411', 'step': 4504, 'epoch': 1} {'type': 'loss', 'content': 0.2765408754348755, 'timestamp': '2025-09-10 02:38:46.997497', 'step': 4505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:47.051687', 'step': 4505, 'epoch': 1} {'type': 'loss', 'content': 0.10382575541734695, 'timestamp': '2025-09-10 02:38:47.053793', 'step': 4506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:47.107908', 'step': 4506, 'epoch': 1} {'type': 'loss', 'content': 0.22309958934783936, 'timestamp': '2025-09-10 02:38:47.110021', 'step': 4507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:47.167577', 'step': 4507, 'epoch': 1} {'type': 'loss', 'content': 0.20943593978881836, 'timestamp': '2025-09-10 02:38:47.173957', 'step': 4508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:47.228183', 'step': 4508, 'epoch': 1} {'type': 'loss', 'content': 0.15933804214000702, 'timestamp': '2025-09-10 02:38:47.230338', 'step': 4509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:47.284121', 'step': 4509, 'epoch': 1} {'type': 'loss', 'content': 0.1783500611782074, 'timestamp': '2025-09-10 02:38:47.286172', 'step': 4510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:47.340320', 'step': 4510, 'epoch': 1} {'type': 'loss', 'content': 0.16742298007011414, 'timestamp': '2025-09-10 02:38:47.342599', 'step': 4511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:47.396895', 'step': 4511, 'epoch': 1} {'type': 'loss', 'content': 0.22550922632217407, 'timestamp': '2025-09-10 02:38:47.403402', 'step': 4512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:47.458453', 'step': 4512, 'epoch': 1} {'type': 'loss', 'content': 0.19421982765197754, 'timestamp': '2025-09-10 02:38:47.460733', 'step': 4513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:47.515612', 'step': 4513, 'epoch': 1} {'type': 'loss', 'content': 0.272169828414917, 'timestamp': '2025-09-10 02:38:47.517929', 'step': 4514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:47.573415', 'step': 4514, 'epoch': 1} {'type': 'loss', 'content': 0.1579587310552597, 'timestamp': '2025-09-10 02:38:47.575729', 'step': 4515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:47.631807', 'step': 4515, 'epoch': 1} {'type': 'loss', 'content': 0.1430816352367401, 'timestamp': '2025-09-10 02:38:47.638772', 'step': 4516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:47.694699', 'step': 4516, 'epoch': 1} {'type': 'loss', 'content': 0.1300605684518814, 'timestamp': '2025-09-10 02:38:47.696818', 'step': 4517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:47.753310', 'step': 4517, 'epoch': 1} {'type': 'loss', 'content': 0.1788092404603958, 'timestamp': '2025-09-10 02:38:47.755370', 'step': 4518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:47.809285', 'step': 4518, 'epoch': 1} {'type': 'loss', 'content': 0.1493673026561737, 'timestamp': '2025-09-10 02:38:47.811463', 'step': 4519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:47.864772', 'step': 4519, 'epoch': 1} {'type': 'loss', 'content': 0.15568462014198303, 'timestamp': '2025-09-10 02:38:47.870990', 'step': 4520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:47.925065', 'step': 4520, 'epoch': 1} {'type': 'loss', 'content': 0.1905600130558014, 'timestamp': '2025-09-10 02:38:47.927293', 'step': 4521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:47.983559', 'step': 4521, 'epoch': 1} {'type': 'loss', 'content': 0.17446266114711761, 'timestamp': '2025-09-10 02:38:47.985835', 'step': 4522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:48.041007', 'step': 4522, 'epoch': 1} {'type': 'loss', 'content': 0.11350682377815247, 'timestamp': '2025-09-10 02:38:48.043309', 'step': 4523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:48.097909', 'step': 4523, 'epoch': 1} {'type': 'loss', 'content': 0.16602835059165955, 'timestamp': '2025-09-10 02:38:48.104353', 'step': 4524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:48.158294', 'step': 4524, 'epoch': 1} {'type': 'loss', 'content': 0.1073649525642395, 'timestamp': '2025-09-10 02:38:48.160602', 'step': 4525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:48.217035', 'step': 4525, 'epoch': 1} {'type': 'loss', 'content': 0.16650275886058807, 'timestamp': '2025-09-10 02:38:48.219184', 'step': 4526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:48.273231', 'step': 4526, 'epoch': 1} {'type': 'loss', 'content': 0.16148759424686432, 'timestamp': '2025-09-10 02:38:48.275396', 'step': 4527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:48.329525', 'step': 4527, 'epoch': 1} {'type': 'loss', 'content': 0.13499635457992554, 'timestamp': '2025-09-10 02:38:48.335768', 'step': 4528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:48.389993', 'step': 4528, 'epoch': 1} {'type': 'loss', 'content': 0.18751969933509827, 'timestamp': '2025-09-10 02:38:48.391979', 'step': 4529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:48.445842', 'step': 4529, 'epoch': 1} {'type': 'loss', 'content': 0.1664564609527588, 'timestamp': '2025-09-10 02:38:48.448073', 'step': 4530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:48.506445', 'step': 4530, 'epoch': 1} {'type': 'loss', 'content': 0.15543398261070251, 'timestamp': '2025-09-10 02:38:48.508494', 'step': 4531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:48.562491', 'step': 4531, 'epoch': 1} {'type': 'loss', 'content': 0.16200174391269684, 'timestamp': '2025-09-10 02:38:48.568737', 'step': 4532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:48.625161', 'step': 4532, 'epoch': 1} {'type': 'loss', 'content': 0.14694568514823914, 'timestamp': '2025-09-10 02:38:48.626999', 'step': 4533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:48.683502', 'step': 4533, 'epoch': 1} {'type': 'loss', 'content': 0.3519456684589386, 'timestamp': '2025-09-10 02:38:48.685386', 'step': 4534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:48.742718', 'step': 4534, 'epoch': 1} {'type': 'loss', 'content': 0.18328817188739777, 'timestamp': '2025-09-10 02:38:48.744606', 'step': 4535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:48.802007', 'step': 4535, 'epoch': 1} {'type': 'loss', 'content': 0.1756710261106491, 'timestamp': '2025-09-10 02:38:48.808388', 'step': 4536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:48.863127', 'step': 4536, 'epoch': 1} {'type': 'loss', 'content': 0.19094623625278473, 'timestamp': '2025-09-10 02:38:48.865441', 'step': 4537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:48.919620', 'step': 4537, 'epoch': 1} {'type': 'loss', 'content': 0.19397461414337158, 'timestamp': '2025-09-10 02:38:48.921832', 'step': 4538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:48.977721', 'step': 4538, 'epoch': 1} {'type': 'loss', 'content': 0.15144242346286774, 'timestamp': '2025-09-10 02:38:48.979667', 'step': 4539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:49.034612', 'step': 4539, 'epoch': 1} {'type': 'loss', 'content': 0.18047469854354858, 'timestamp': '2025-09-10 02:38:49.040411', 'step': 4540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:49.095593', 'step': 4540, 'epoch': 1} {'type': 'loss', 'content': 0.20611517131328583, 'timestamp': '2025-09-10 02:38:49.097357', 'step': 4541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:49.152069', 'step': 4541, 'epoch': 1} {'type': 'loss', 'content': 0.11617463827133179, 'timestamp': '2025-09-10 02:38:49.153935', 'step': 4542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:49.208519', 'step': 4542, 'epoch': 1} {'type': 'loss', 'content': 0.24478858709335327, 'timestamp': '2025-09-10 02:38:49.210446', 'step': 4543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:49.265858', 'step': 4543, 'epoch': 1} {'type': 'loss', 'content': 0.10501844435930252, 'timestamp': '2025-09-10 02:38:49.271902', 'step': 4544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:49.326148', 'step': 4544, 'epoch': 1} {'type': 'loss', 'content': 0.2626506984233856, 'timestamp': '2025-09-10 02:38:49.328555', 'step': 4545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:49.382256', 'step': 4545, 'epoch': 1} {'type': 'loss', 'content': 0.09722618758678436, 'timestamp': '2025-09-10 02:38:49.384103', 'step': 4546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:49.439207', 'step': 4546, 'epoch': 1} {'type': 'loss', 'content': 0.13633140921592712, 'timestamp': '2025-09-10 02:38:49.441133', 'step': 4547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:49.495530', 'step': 4547, 'epoch': 1} {'type': 'loss', 'content': 0.1881372630596161, 'timestamp': '2025-09-10 02:38:49.501400', 'step': 4548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:49.555619', 'step': 4548, 'epoch': 1} {'type': 'loss', 'content': 0.15607112646102905, 'timestamp': '2025-09-10 02:38:49.557421', 'step': 4549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:49.610694', 'step': 4549, 'epoch': 1} {'type': 'loss', 'content': 0.17369240522384644, 'timestamp': '2025-09-10 02:38:49.612495', 'step': 4550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:49.668882', 'step': 4550, 'epoch': 1} {'type': 'loss', 'content': 0.05957410857081413, 'timestamp': '2025-09-10 02:38:49.670676', 'step': 4551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:49.726896', 'step': 4551, 'epoch': 1} {'type': 'loss', 'content': 0.1545393317937851, 'timestamp': '2025-09-10 02:38:49.732722', 'step': 4552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:49.787291', 'step': 4552, 'epoch': 1} {'type': 'loss', 'content': 0.13693389296531677, 'timestamp': '2025-09-10 02:38:49.789369', 'step': 4553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:49.853311', 'step': 4553, 'epoch': 1} {'type': 'loss', 'content': 0.15086176991462708, 'timestamp': '2025-09-10 02:38:49.855331', 'step': 4554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:49.911436', 'step': 4554, 'epoch': 1} {'type': 'loss', 'content': 0.18833200633525848, 'timestamp': '2025-09-10 02:38:49.913245', 'step': 4555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:49.968347', 'step': 4555, 'epoch': 1} {'type': 'loss', 'content': 0.08633961528539658, 'timestamp': '2025-09-10 02:38:49.974521', 'step': 4556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:50.027967', 'step': 4556, 'epoch': 1} {'type': 'loss', 'content': 0.1493109166622162, 'timestamp': '2025-09-10 02:38:50.030024', 'step': 4557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:50.083340', 'step': 4557, 'epoch': 1} {'type': 'loss', 'content': 0.10068447142839432, 'timestamp': '2025-09-10 02:38:50.085261', 'step': 4558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:50.138591', 'step': 4558, 'epoch': 1} {'type': 'loss', 'content': 0.11879348754882812, 'timestamp': '2025-09-10 02:38:50.140571', 'step': 4559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:50.195190', 'step': 4559, 'epoch': 1} {'type': 'loss', 'content': 0.14630524814128876, 'timestamp': '2025-09-10 02:38:50.201409', 'step': 4560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:50.257175', 'step': 4560, 'epoch': 1} {'type': 'loss', 'content': 0.12268050760030746, 'timestamp': '2025-09-10 02:38:50.259062', 'step': 4561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:50.313762', 'step': 4561, 'epoch': 1} {'type': 'loss', 'content': 0.14274543523788452, 'timestamp': '2025-09-10 02:38:50.315596', 'step': 4562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:50.371523', 'step': 4562, 'epoch': 1} {'type': 'loss', 'content': 0.12960144877433777, 'timestamp': '2025-09-10 02:38:50.373379', 'step': 4563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:50.426920', 'step': 4563, 'epoch': 1} {'type': 'loss', 'content': 0.17021380364894867, 'timestamp': '2025-09-10 02:38:50.432831', 'step': 4564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:50.486440', 'step': 4564, 'epoch': 1} {'type': 'loss', 'content': 0.1375453770160675, 'timestamp': '2025-09-10 02:38:50.488514', 'step': 4565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:50.544026', 'step': 4565, 'epoch': 1} {'type': 'loss', 'content': 0.09915599226951599, 'timestamp': '2025-09-10 02:38:50.546141', 'step': 4566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:50.599919', 'step': 4566, 'epoch': 1} {'type': 'loss', 'content': 0.2064734399318695, 'timestamp': '2025-09-10 02:38:50.601916', 'step': 4567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:50.655886', 'step': 4567, 'epoch': 1} {'type': 'loss', 'content': 0.1309647411108017, 'timestamp': '2025-09-10 02:38:50.662106', 'step': 4568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:50.720659', 'step': 4568, 'epoch': 1} {'type': 'loss', 'content': 0.19457072019577026, 'timestamp': '2025-09-10 02:38:50.722878', 'step': 4569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:50.776828', 'step': 4569, 'epoch': 1} {'type': 'loss', 'content': 0.22946694493293762, 'timestamp': '2025-09-10 02:38:50.778887', 'step': 4570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:50.833444', 'step': 4570, 'epoch': 1} {'type': 'loss', 'content': 0.1644226461648941, 'timestamp': '2025-09-10 02:38:50.835639', 'step': 4571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:50.890126', 'step': 4571, 'epoch': 1} {'type': 'loss', 'content': 0.17282575368881226, 'timestamp': '2025-09-10 02:38:50.896867', 'step': 4572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:50.949907', 'step': 4572, 'epoch': 1} {'type': 'loss', 'content': 0.18970470130443573, 'timestamp': '2025-09-10 02:38:50.952241', 'step': 4573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:51.005489', 'step': 4573, 'epoch': 1} {'type': 'loss', 'content': 0.10358081012964249, 'timestamp': '2025-09-10 02:38:51.007863', 'step': 4574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:51.062208', 'step': 4574, 'epoch': 1} {'type': 'loss', 'content': 0.1780926138162613, 'timestamp': '2025-09-10 02:38:51.064302', 'step': 4575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:51.117872', 'step': 4575, 'epoch': 1} {'type': 'loss', 'content': 0.1855391561985016, 'timestamp': '2025-09-10 02:38:51.124179', 'step': 4576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:51.177071', 'step': 4576, 'epoch': 1} {'type': 'loss', 'content': 0.19895565509796143, 'timestamp': '2025-09-10 02:38:51.179356', 'step': 4577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:51.239070', 'step': 4577, 'epoch': 1} {'type': 'loss', 'content': 0.19878637790679932, 'timestamp': '2025-09-10 02:38:51.241145', 'step': 4578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:51.295616', 'step': 4578, 'epoch': 1} {'type': 'loss', 'content': 0.09082543104887009, 'timestamp': '2025-09-10 02:38:51.297721', 'step': 4579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:51.355508', 'step': 4579, 'epoch': 1} {'type': 'loss', 'content': 0.13585449755191803, 'timestamp': '2025-09-10 02:38:51.361885', 'step': 4580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:51.419859', 'step': 4580, 'epoch': 1} {'type': 'loss', 'content': 0.15708015859127045, 'timestamp': '2025-09-10 02:38:51.421969', 'step': 4581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:51.495460', 'step': 4581, 'epoch': 1} {'type': 'loss', 'content': 0.16015416383743286, 'timestamp': '2025-09-10 02:38:51.497523', 'step': 4582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:51.551306', 'step': 4582, 'epoch': 1} {'type': 'loss', 'content': 0.2613103687763214, 'timestamp': '2025-09-10 02:38:51.553510', 'step': 4583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:51.606926', 'step': 4583, 'epoch': 1} {'type': 'loss', 'content': 0.10991068184375763, 'timestamp': '2025-09-10 02:38:51.613102', 'step': 4584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:51.668176', 'step': 4584, 'epoch': 1} {'type': 'loss', 'content': 0.20168165862560272, 'timestamp': '2025-09-10 02:38:51.670305', 'step': 4585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:51.725004', 'step': 4585, 'epoch': 1} {'type': 'loss', 'content': 0.13263747096061707, 'timestamp': '2025-09-10 02:38:51.727091', 'step': 4586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:51.780937', 'step': 4586, 'epoch': 1} {'type': 'loss', 'content': 0.11120821535587311, 'timestamp': '2025-09-10 02:38:51.783045', 'step': 4587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:51.837434', 'step': 4587, 'epoch': 1} {'type': 'loss', 'content': 0.20554453134536743, 'timestamp': '2025-09-10 02:38:51.844015', 'step': 4588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:51.897957', 'step': 4588, 'epoch': 1} {'type': 'loss', 'content': 0.11330955475568771, 'timestamp': '2025-09-10 02:38:51.900233', 'step': 4589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:51.955127', 'step': 4589, 'epoch': 1} {'type': 'loss', 'content': 0.12181521952152252, 'timestamp': '2025-09-10 02:38:51.957210', 'step': 4590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:52.012187', 'step': 4590, 'epoch': 1} {'type': 'loss', 'content': 0.13632731139659882, 'timestamp': '2025-09-10 02:38:52.014476', 'step': 4591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:52.069210', 'step': 4591, 'epoch': 1} {'type': 'loss', 'content': 0.21318678557872772, 'timestamp': '2025-09-10 02:38:52.075553', 'step': 4592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:52.129870', 'step': 4592, 'epoch': 1} {'type': 'loss', 'content': 0.16523653268814087, 'timestamp': '2025-09-10 02:38:52.132079', 'step': 4593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:52.186307', 'step': 4593, 'epoch': 1} {'type': 'loss', 'content': 0.22930793464183807, 'timestamp': '2025-09-10 02:38:52.188310', 'step': 4594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:52.241717', 'step': 4594, 'epoch': 1} {'type': 'loss', 'content': 0.15619701147079468, 'timestamp': '2025-09-10 02:38:52.243938', 'step': 4595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:52.297439', 'step': 4595, 'epoch': 1} {'type': 'loss', 'content': 0.11262192577123642, 'timestamp': '2025-09-10 02:38:52.303561', 'step': 4596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:52.356603', 'step': 4596, 'epoch': 1} {'type': 'loss', 'content': 0.17981751263141632, 'timestamp': '2025-09-10 02:38:52.358580', 'step': 4597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:52.411723', 'step': 4597, 'epoch': 1} {'type': 'loss', 'content': 0.08506461977958679, 'timestamp': '2025-09-10 02:38:52.413916', 'step': 4598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:52.468278', 'step': 4598, 'epoch': 1} {'type': 'loss', 'content': 0.1436329185962677, 'timestamp': '2025-09-10 02:38:52.470397', 'step': 4599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:52.523900', 'step': 4599, 'epoch': 1} {'type': 'loss', 'content': 0.19423891603946686, 'timestamp': '2025-09-10 02:38:52.530113', 'step': 4600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:52.583763', 'step': 4600, 'epoch': 1} {'type': 'loss', 'content': 0.17794331908226013, 'timestamp': '2025-09-10 02:38:52.585780', 'step': 4601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:52.639177', 'step': 4601, 'epoch': 1} {'type': 'loss', 'content': 0.14253520965576172, 'timestamp': '2025-09-10 02:38:52.641596', 'step': 4602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:52.695411', 'step': 4602, 'epoch': 1} {'type': 'loss', 'content': 0.19233882427215576, 'timestamp': '2025-09-10 02:38:52.697682', 'step': 4603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:52.751238', 'step': 4603, 'epoch': 1} {'type': 'loss', 'content': 0.13572195172309875, 'timestamp': '2025-09-10 02:38:52.757404', 'step': 4604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:52.811087', 'step': 4604, 'epoch': 1} {'type': 'loss', 'content': 0.1379954218864441, 'timestamp': '2025-09-10 02:38:52.813499', 'step': 4605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:52.867529', 'step': 4605, 'epoch': 1} {'type': 'loss', 'content': 0.151034876704216, 'timestamp': '2025-09-10 02:38:52.869606', 'step': 4606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:52.924077', 'step': 4606, 'epoch': 1} {'type': 'loss', 'content': 0.12190122157335281, 'timestamp': '2025-09-10 02:38:52.926229', 'step': 4607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:52.980827', 'step': 4607, 'epoch': 1} {'type': 'loss', 'content': 0.14389440417289734, 'timestamp': '2025-09-10 02:38:52.987305', 'step': 4608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:53.041782', 'step': 4608, 'epoch': 1} {'type': 'loss', 'content': 0.1452893167734146, 'timestamp': '2025-09-10 02:38:53.043845', 'step': 4609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:53.100158', 'step': 4609, 'epoch': 1} {'type': 'loss', 'content': 0.15754123032093048, 'timestamp': '2025-09-10 02:38:53.102741', 'step': 4610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:53.157160', 'step': 4610, 'epoch': 1} {'type': 'loss', 'content': 0.15468180179595947, 'timestamp': '2025-09-10 02:38:53.159293', 'step': 4611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:53.212763', 'step': 4611, 'epoch': 1} {'type': 'loss', 'content': 0.1050575003027916, 'timestamp': '2025-09-10 02:38:53.218933', 'step': 4612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:53.272224', 'step': 4612, 'epoch': 1} {'type': 'loss', 'content': 0.28581464290618896, 'timestamp': '2025-09-10 02:38:53.274782', 'step': 4613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:53.328710', 'step': 4613, 'epoch': 1} {'type': 'loss', 'content': 0.1475469321012497, 'timestamp': '2025-09-10 02:38:53.334143', 'step': 4614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:53.394830', 'step': 4614, 'epoch': 1} {'type': 'loss', 'content': 0.1261664181947708, 'timestamp': '2025-09-10 02:38:53.396988', 'step': 4615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:53.451330', 'step': 4615, 'epoch': 1} {'type': 'loss', 'content': 0.1760391741991043, 'timestamp': '2025-09-10 02:38:53.458510', 'step': 4616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:53.528481', 'step': 4616, 'epoch': 1} {'type': 'loss', 'content': 0.12695355713367462, 'timestamp': '2025-09-10 02:38:53.531420', 'step': 4617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:53.587129', 'step': 4617, 'epoch': 1} {'type': 'loss', 'content': 0.10908544063568115, 'timestamp': '2025-09-10 02:38:53.589602', 'step': 4618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:53.644713', 'step': 4618, 'epoch': 1} {'type': 'loss', 'content': 0.14998313784599304, 'timestamp': '2025-09-10 02:38:53.647098', 'step': 4619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:53.702001', 'step': 4619, 'epoch': 1} {'type': 'loss', 'content': 0.24273207783699036, 'timestamp': '2025-09-10 02:38:53.711830', 'step': 4620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:53.772664', 'step': 4620, 'epoch': 1} {'type': 'loss', 'content': 0.14634650945663452, 'timestamp': '2025-09-10 02:38:53.782571', 'step': 4621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:53.839537', 'step': 4621, 'epoch': 1} {'type': 'loss', 'content': 0.13281545042991638, 'timestamp': '2025-09-10 02:38:53.841703', 'step': 4622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:53.897829', 'step': 4622, 'epoch': 1} {'type': 'loss', 'content': 0.13227154314517975, 'timestamp': '2025-09-10 02:38:53.900123', 'step': 4623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:53.956432', 'step': 4623, 'epoch': 1} {'type': 'loss', 'content': 0.16314128041267395, 'timestamp': '2025-09-10 02:38:53.962815', 'step': 4624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:54.016750', 'step': 4624, 'epoch': 1} {'type': 'loss', 'content': 0.41573137044906616, 'timestamp': '2025-09-10 02:38:54.018982', 'step': 4625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:54.074481', 'step': 4625, 'epoch': 1} {'type': 'loss', 'content': 0.20759643614292145, 'timestamp': '2025-09-10 02:38:54.076649', 'step': 4626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:38:54.137035', 'step': 4626, 'epoch': 1} {'type': 'loss', 'content': 0.19870415329933167, 'timestamp': '2025-09-10 02:38:54.139336', 'step': 4627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:54.196388', 'step': 4627, 'epoch': 1} {'type': 'loss', 'content': 0.08952096849679947, 'timestamp': '2025-09-10 02:38:54.202873', 'step': 4628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:54.268332', 'step': 4628, 'epoch': 1} {'type': 'loss', 'content': 0.21546374261379242, 'timestamp': '2025-09-10 02:38:54.271580', 'step': 4629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:54.328031', 'step': 4629, 'epoch': 1} {'type': 'loss', 'content': 0.1934456080198288, 'timestamp': '2025-09-10 02:38:54.330348', 'step': 4630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:54.384334', 'step': 4630, 'epoch': 1} {'type': 'loss', 'content': 0.11716896295547485, 'timestamp': '2025-09-10 02:38:54.389965', 'step': 4631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:54.447528', 'step': 4631, 'epoch': 1} {'type': 'loss', 'content': 0.15858256816864014, 'timestamp': '2025-09-10 02:38:54.453842', 'step': 4632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:54.513990', 'step': 4632, 'epoch': 1} {'type': 'loss', 'content': 0.18281413614749908, 'timestamp': '2025-09-10 02:38:54.516308', 'step': 4633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:54.571183', 'step': 4633, 'epoch': 1} {'type': 'loss', 'content': 0.1427699625492096, 'timestamp': '2025-09-10 02:38:54.573640', 'step': 4634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:54.631135', 'step': 4634, 'epoch': 1} {'type': 'loss', 'content': 0.1519901305437088, 'timestamp': '2025-09-10 02:38:54.633564', 'step': 4635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:54.690445', 'step': 4635, 'epoch': 1} {'type': 'loss', 'content': 0.1376418173313141, 'timestamp': '2025-09-10 02:38:54.697341', 'step': 4636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:54.752474', 'step': 4636, 'epoch': 1} {'type': 'loss', 'content': 0.22700245678424835, 'timestamp': '2025-09-10 02:38:54.754698', 'step': 4637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:54.809422', 'step': 4637, 'epoch': 1} {'type': 'loss', 'content': 0.1785791516304016, 'timestamp': '2025-09-10 02:38:54.811730', 'step': 4638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:54.865375', 'step': 4638, 'epoch': 1} {'type': 'loss', 'content': 0.2680281400680542, 'timestamp': '2025-09-10 02:38:54.868185', 'step': 4639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:54.922462', 'step': 4639, 'epoch': 1} {'type': 'loss', 'content': 0.2282664030790329, 'timestamp': '2025-09-10 02:38:54.928839', 'step': 4640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:54.988909', 'step': 4640, 'epoch': 1} {'type': 'loss', 'content': 0.15698543190956116, 'timestamp': '2025-09-10 02:38:54.991075', 'step': 4641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:55.045464', 'step': 4641, 'epoch': 1} {'type': 'loss', 'content': 0.1943107396364212, 'timestamp': '2025-09-10 02:38:55.047618', 'step': 4642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:55.103954', 'step': 4642, 'epoch': 1} {'type': 'loss', 'content': 0.08715858310461044, 'timestamp': '2025-09-10 02:38:55.106380', 'step': 4643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:55.162948', 'step': 4643, 'epoch': 1} {'type': 'loss', 'content': 0.224721759557724, 'timestamp': '2025-09-10 02:38:55.172145', 'step': 4644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:55.232602', 'step': 4644, 'epoch': 1} {'type': 'loss', 'content': 0.16574470698833466, 'timestamp': '2025-09-10 02:38:55.234903', 'step': 4645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:55.290203', 'step': 4645, 'epoch': 1} {'type': 'loss', 'content': 0.19195058941841125, 'timestamp': '2025-09-10 02:38:55.292546', 'step': 4646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:55.346898', 'step': 4646, 'epoch': 1} {'type': 'loss', 'content': 0.17171555757522583, 'timestamp': '2025-09-10 02:38:55.349116', 'step': 4647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:55.403473', 'step': 4647, 'epoch': 1} {'type': 'loss', 'content': 0.13856364786624908, 'timestamp': '2025-09-10 02:38:55.412175', 'step': 4648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:55.468608', 'step': 4648, 'epoch': 1} {'type': 'loss', 'content': 0.21269677579402924, 'timestamp': '2025-09-10 02:38:55.470802', 'step': 4649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:55.524639', 'step': 4649, 'epoch': 1} {'type': 'loss', 'content': 0.18499936163425446, 'timestamp': '2025-09-10 02:38:55.526864', 'step': 4650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:55.585506', 'step': 4650, 'epoch': 1} {'type': 'loss', 'content': 0.2857759892940521, 'timestamp': '2025-09-10 02:38:55.587792', 'step': 4651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:38:55.646489', 'step': 4651, 'epoch': 1} {'type': 'loss', 'content': 0.10255894064903259, 'timestamp': '2025-09-10 02:38:55.653337', 'step': 4652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:55.709195', 'step': 4652, 'epoch': 1} {'type': 'loss', 'content': 0.18233737349510193, 'timestamp': '2025-09-10 02:38:55.711407', 'step': 4653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:55.774827', 'step': 4653, 'epoch': 1} {'type': 'loss', 'content': 0.2497934103012085, 'timestamp': '2025-09-10 02:38:55.777096', 'step': 4654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:55.832671', 'step': 4654, 'epoch': 1} {'type': 'loss', 'content': 0.16939319670200348, 'timestamp': '2025-09-10 02:38:55.835195', 'step': 4655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:55.893246', 'step': 4655, 'epoch': 1} {'type': 'loss', 'content': 0.128335639834404, 'timestamp': '2025-09-10 02:38:55.899936', 'step': 4656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:55.957298', 'step': 4656, 'epoch': 1} {'type': 'loss', 'content': 0.2128259837627411, 'timestamp': '2025-09-10 02:38:55.959566', 'step': 4657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:38:56.022510', 'step': 4657, 'epoch': 1} {'type': 'loss', 'content': 0.21150602400302887, 'timestamp': '2025-09-10 02:38:56.024909', 'step': 4658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:56.083659', 'step': 4658, 'epoch': 1} {'type': 'loss', 'content': 0.15369854867458344, 'timestamp': '2025-09-10 02:38:56.085948', 'step': 4659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:38:56.147547', 'step': 4659, 'epoch': 1} {'type': 'loss', 'content': 0.24156951904296875, 'timestamp': '2025-09-10 02:38:56.154961', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:39:09.006613', 'step': 4660, 'epoch': 1} {'type': 'pplx', 'content': 10682.406030238879, 'timestamp': '2025-09-10 02:39:09.009745', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:09.063546', 'step': 4660, 'epoch': 1} {'type': 'loss', 'content': 0.1392248570919037, 'timestamp': '2025-09-10 02:39:09.065979', 'step': 4661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:09.119935', 'step': 4661, 'epoch': 1} {'type': 'loss', 'content': 0.17190860211849213, 'timestamp': '2025-09-10 02:39:09.122094', 'step': 4662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:09.175339', 'step': 4662, 'epoch': 1} {'type': 'loss', 'content': 0.18469810485839844, 'timestamp': '2025-09-10 02:39:09.177554', 'step': 4663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:09.231365', 'step': 4663, 'epoch': 1} {'type': 'loss', 'content': 0.13946154713630676, 'timestamp': '2025-09-10 02:39:09.237680', 'step': 4664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:09.290245', 'step': 4664, 'epoch': 1} {'type': 'loss', 'content': 0.09870117157697678, 'timestamp': '2025-09-10 02:39:09.292546', 'step': 4665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:09.345672', 'step': 4665, 'epoch': 1} {'type': 'loss', 'content': 0.23125052452087402, 'timestamp': '2025-09-10 02:39:09.347972', 'step': 4666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:09.407497', 'step': 4666, 'epoch': 1} {'type': 'loss', 'content': 0.15221086144447327, 'timestamp': '2025-09-10 02:39:09.409779', 'step': 4667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:09.464071', 'step': 4667, 'epoch': 1} {'type': 'loss', 'content': 0.25159066915512085, 'timestamp': '2025-09-10 02:39:09.470187', 'step': 4668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:09.523211', 'step': 4668, 'epoch': 1} {'type': 'loss', 'content': 0.15416109561920166, 'timestamp': '2025-09-10 02:39:09.525686', 'step': 4669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:09.580631', 'step': 4669, 'epoch': 1} {'type': 'loss', 'content': 0.20998883247375488, 'timestamp': '2025-09-10 02:39:09.584723', 'step': 4670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:09.639608', 'step': 4670, 'epoch': 1} {'type': 'loss', 'content': 0.19267094135284424, 'timestamp': '2025-09-10 02:39:09.641890', 'step': 4671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:09.696461', 'step': 4671, 'epoch': 1} {'type': 'loss', 'content': 0.20297347009181976, 'timestamp': '2025-09-10 02:39:09.702524', 'step': 4672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:09.755427', 'step': 4672, 'epoch': 1} {'type': 'loss', 'content': 0.15542206168174744, 'timestamp': '2025-09-10 02:39:09.757514', 'step': 4673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:09.810454', 'step': 4673, 'epoch': 1} {'type': 'loss', 'content': 0.15965937077999115, 'timestamp': '2025-09-10 02:39:09.812507', 'step': 4674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:09.865682', 'step': 4674, 'epoch': 1} {'type': 'loss', 'content': 0.1913837194442749, 'timestamp': '2025-09-10 02:39:09.867722', 'step': 4675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:09.921486', 'step': 4675, 'epoch': 1} {'type': 'loss', 'content': 0.1934303492307663, 'timestamp': '2025-09-10 02:39:09.927671', 'step': 4676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:09.980621', 'step': 4676, 'epoch': 1} {'type': 'loss', 'content': 0.24311107397079468, 'timestamp': '2025-09-10 02:39:09.982890', 'step': 4677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:10.036229', 'step': 4677, 'epoch': 1} {'type': 'loss', 'content': 0.143010213971138, 'timestamp': '2025-09-10 02:39:10.038499', 'step': 4678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:10.092033', 'step': 4678, 'epoch': 1} {'type': 'loss', 'content': 0.20463360846042633, 'timestamp': '2025-09-10 02:39:10.094288', 'step': 4679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:10.148189', 'step': 4679, 'epoch': 1} {'type': 'loss', 'content': 0.19229647517204285, 'timestamp': '2025-09-10 02:39:10.154304', 'step': 4680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:10.207002', 'step': 4680, 'epoch': 1} {'type': 'loss', 'content': 0.12273625284433365, 'timestamp': '2025-09-10 02:39:10.209234', 'step': 4681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:10.263196', 'step': 4681, 'epoch': 1} {'type': 'loss', 'content': 0.18213094770908356, 'timestamp': '2025-09-10 02:39:10.265475', 'step': 4682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:10.319162', 'step': 4682, 'epoch': 1} {'type': 'loss', 'content': 0.11015338450670242, 'timestamp': '2025-09-10 02:39:10.321553', 'step': 4683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:10.375739', 'step': 4683, 'epoch': 1} {'type': 'loss', 'content': 0.23970124125480652, 'timestamp': '2025-09-10 02:39:10.381851', 'step': 4684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:10.434840', 'step': 4684, 'epoch': 1} {'type': 'loss', 'content': 0.1674158275127411, 'timestamp': '2025-09-10 02:39:10.437156', 'step': 4685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:10.493672', 'step': 4685, 'epoch': 1} {'type': 'loss', 'content': 0.17437875270843506, 'timestamp': '2025-09-10 02:39:10.495942', 'step': 4686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:10.549541', 'step': 4686, 'epoch': 1} {'type': 'loss', 'content': 0.1698790043592453, 'timestamp': '2025-09-10 02:39:10.551787', 'step': 4687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:10.606534', 'step': 4687, 'epoch': 1} {'type': 'loss', 'content': 0.11471407115459442, 'timestamp': '2025-09-10 02:39:10.612751', 'step': 4688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:10.666514', 'step': 4688, 'epoch': 1} {'type': 'loss', 'content': 0.0553913339972496, 'timestamp': '2025-09-10 02:39:10.668832', 'step': 4689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:10.722319', 'step': 4689, 'epoch': 1} {'type': 'loss', 'content': 0.10378911346197128, 'timestamp': '2025-09-10 02:39:10.724368', 'step': 4690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:10.777682', 'step': 4690, 'epoch': 1} {'type': 'loss', 'content': 0.20472420752048492, 'timestamp': '2025-09-10 02:39:10.779912', 'step': 4691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:10.833324', 'step': 4691, 'epoch': 1} {'type': 'loss', 'content': 0.10134246945381165, 'timestamp': '2025-09-10 02:39:10.839239', 'step': 4692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:10.893314', 'step': 4692, 'epoch': 1} {'type': 'loss', 'content': 0.11107414960861206, 'timestamp': '2025-09-10 02:39:10.895702', 'step': 4693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:10.949513', 'step': 4693, 'epoch': 1} {'type': 'loss', 'content': 0.1843392699956894, 'timestamp': '2025-09-10 02:39:10.951783', 'step': 4694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:11.005661', 'step': 4694, 'epoch': 1} {'type': 'loss', 'content': 0.17596986889839172, 'timestamp': '2025-09-10 02:39:11.007773', 'step': 4695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:11.061893', 'step': 4695, 'epoch': 1} {'type': 'loss', 'content': 0.147801011800766, 'timestamp': '2025-09-10 02:39:11.067869', 'step': 4696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:11.120260', 'step': 4696, 'epoch': 1} {'type': 'loss', 'content': 0.0957551971077919, 'timestamp': '2025-09-10 02:39:11.123952', 'step': 4697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:11.178775', 'step': 4697, 'epoch': 1} {'type': 'loss', 'content': 0.22275227308273315, 'timestamp': '2025-09-10 02:39:11.181289', 'step': 4698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:11.235835', 'step': 4698, 'epoch': 1} {'type': 'loss', 'content': 0.14291928708553314, 'timestamp': '2025-09-10 02:39:11.243979', 'step': 4699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:11.300526', 'step': 4699, 'epoch': 1} {'type': 'loss', 'content': 0.23139701783657074, 'timestamp': '2025-09-10 02:39:11.309540', 'step': 4700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:11.363616', 'step': 4700, 'epoch': 1} {'type': 'loss', 'content': 0.15763826668262482, 'timestamp': '2025-09-10 02:39:11.365785', 'step': 4701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:11.421337', 'step': 4701, 'epoch': 1} {'type': 'loss', 'content': 0.12728199362754822, 'timestamp': '2025-09-10 02:39:11.423422', 'step': 4702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:11.476939', 'step': 4702, 'epoch': 1} {'type': 'loss', 'content': 0.12096337974071503, 'timestamp': '2025-09-10 02:39:11.485401', 'step': 4703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:11.543206', 'step': 4703, 'epoch': 1} {'type': 'loss', 'content': 0.183461993932724, 'timestamp': '2025-09-10 02:39:11.552189', 'step': 4704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:11.606081', 'step': 4704, 'epoch': 1} {'type': 'loss', 'content': 0.10556637495756149, 'timestamp': '2025-09-10 02:39:11.608357', 'step': 4705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:11.661077', 'step': 4705, 'epoch': 1} {'type': 'loss', 'content': 0.11827829480171204, 'timestamp': '2025-09-10 02:39:11.663377', 'step': 4706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:11.719253', 'step': 4706, 'epoch': 1} {'type': 'loss', 'content': 0.16596554219722748, 'timestamp': '2025-09-10 02:39:11.721568', 'step': 4707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:11.776135', 'step': 4707, 'epoch': 1} {'type': 'loss', 'content': 0.1459335833787918, 'timestamp': '2025-09-10 02:39:11.782203', 'step': 4708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:11.836275', 'step': 4708, 'epoch': 1} {'type': 'loss', 'content': 0.1401311457157135, 'timestamp': '2025-09-10 02:39:11.838310', 'step': 4709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:11.902439', 'step': 4709, 'epoch': 1} {'type': 'loss', 'content': 0.14694665372371674, 'timestamp': '2025-09-10 02:39:11.904612', 'step': 4710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:11.957931', 'step': 4710, 'epoch': 1} {'type': 'loss', 'content': 0.1772001087665558, 'timestamp': '2025-09-10 02:39:11.960050', 'step': 4711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:12.013801', 'step': 4711, 'epoch': 1} {'type': 'loss', 'content': 0.1496148407459259, 'timestamp': '2025-09-10 02:39:12.020013', 'step': 4712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:12.072603', 'step': 4712, 'epoch': 1} {'type': 'loss', 'content': 0.1580457240343094, 'timestamp': '2025-09-10 02:39:12.074988', 'step': 4713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:12.128438', 'step': 4713, 'epoch': 1} {'type': 'loss', 'content': 0.38996753096580505, 'timestamp': '2025-09-10 02:39:12.130533', 'step': 4714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:12.183976', 'step': 4714, 'epoch': 1} {'type': 'loss', 'content': 0.22523337602615356, 'timestamp': '2025-09-10 02:39:12.186288', 'step': 4715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:12.242354', 'step': 4715, 'epoch': 1} {'type': 'loss', 'content': 0.17267148196697235, 'timestamp': '2025-09-10 02:39:12.248458', 'step': 4716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:12.301967', 'step': 4716, 'epoch': 1} {'type': 'loss', 'content': 0.18524324893951416, 'timestamp': '2025-09-10 02:39:12.304283', 'step': 4717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:12.358378', 'step': 4717, 'epoch': 1} {'type': 'loss', 'content': 0.13094738125801086, 'timestamp': '2025-09-10 02:39:12.361592', 'step': 4718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:12.415537', 'step': 4718, 'epoch': 1} {'type': 'loss', 'content': 0.1800270974636078, 'timestamp': '2025-09-10 02:39:12.417805', 'step': 4719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:12.472209', 'step': 4719, 'epoch': 1} {'type': 'loss', 'content': 0.07504362612962723, 'timestamp': '2025-09-10 02:39:12.478351', 'step': 4720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:12.532514', 'step': 4720, 'epoch': 1} {'type': 'loss', 'content': 0.21429632604122162, 'timestamp': '2025-09-10 02:39:12.534807', 'step': 4721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:12.589700', 'step': 4721, 'epoch': 1} {'type': 'loss', 'content': 0.12010780721902847, 'timestamp': '2025-09-10 02:39:12.592158', 'step': 4722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:12.648953', 'step': 4722, 'epoch': 1} {'type': 'loss', 'content': 0.1771569848060608, 'timestamp': '2025-09-10 02:39:12.651408', 'step': 4723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:12.705675', 'step': 4723, 'epoch': 1} {'type': 'loss', 'content': 0.1327444165945053, 'timestamp': '2025-09-10 02:39:12.711863', 'step': 4724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:12.766349', 'step': 4724, 'epoch': 1} {'type': 'loss', 'content': 0.12982966005802155, 'timestamp': '2025-09-10 02:39:12.768600', 'step': 4725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:12.822519', 'step': 4725, 'epoch': 1} {'type': 'loss', 'content': 0.28885456919670105, 'timestamp': '2025-09-10 02:39:12.825071', 'step': 4726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:12.880802', 'step': 4726, 'epoch': 1} {'type': 'loss', 'content': 0.21497657895088196, 'timestamp': '2025-09-10 02:39:12.883150', 'step': 4727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:12.937624', 'step': 4727, 'epoch': 1} {'type': 'loss', 'content': 0.23923464119434357, 'timestamp': '2025-09-10 02:39:12.944013', 'step': 4728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:12.997395', 'step': 4728, 'epoch': 1} {'type': 'loss', 'content': 0.06966880708932877, 'timestamp': '2025-09-10 02:39:12.999643', 'step': 4729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:13.054236', 'step': 4729, 'epoch': 1} {'type': 'loss', 'content': 0.13775299489498138, 'timestamp': '2025-09-10 02:39:13.056520', 'step': 4730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:13.111185', 'step': 4730, 'epoch': 1} {'type': 'loss', 'content': 0.18404382467269897, 'timestamp': '2025-09-10 02:39:13.113444', 'step': 4731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:13.167134', 'step': 4731, 'epoch': 1} {'type': 'loss', 'content': 0.18868786096572876, 'timestamp': '2025-09-10 02:39:13.173383', 'step': 4732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:13.227599', 'step': 4732, 'epoch': 1} {'type': 'loss', 'content': 0.15267816185951233, 'timestamp': '2025-09-10 02:39:13.229840', 'step': 4733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:13.284178', 'step': 4733, 'epoch': 1} {'type': 'loss', 'content': 0.16304650902748108, 'timestamp': '2025-09-10 02:39:13.286462', 'step': 4734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:13.340910', 'step': 4734, 'epoch': 1} {'type': 'loss', 'content': 0.23727385699748993, 'timestamp': '2025-09-10 02:39:13.343185', 'step': 4735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:13.398031', 'step': 4735, 'epoch': 1} {'type': 'loss', 'content': 0.14919638633728027, 'timestamp': '2025-09-10 02:39:13.404201', 'step': 4736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:13.457226', 'step': 4736, 'epoch': 1} {'type': 'loss', 'content': 0.1280076801776886, 'timestamp': '2025-09-10 02:39:13.459771', 'step': 4737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:13.515099', 'step': 4737, 'epoch': 1} {'type': 'loss', 'content': 0.17248904705047607, 'timestamp': '2025-09-10 02:39:13.517474', 'step': 4738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:13.570919', 'step': 4738, 'epoch': 1} {'type': 'loss', 'content': 0.19424879550933838, 'timestamp': '2025-09-10 02:39:13.573082', 'step': 4739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:13.629845', 'step': 4739, 'epoch': 1} {'type': 'loss', 'content': 0.12963761389255524, 'timestamp': '2025-09-10 02:39:13.639327', 'step': 4740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:13.693482', 'step': 4740, 'epoch': 1} {'type': 'loss', 'content': 0.2234785258769989, 'timestamp': '2025-09-10 02:39:13.695831', 'step': 4741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:13.756521', 'step': 4741, 'epoch': 1} {'type': 'loss', 'content': 0.10613813251256943, 'timestamp': '2025-09-10 02:39:13.758944', 'step': 4742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:13.812961', 'step': 4742, 'epoch': 1} {'type': 'loss', 'content': 0.1873808205127716, 'timestamp': '2025-09-10 02:39:13.815248', 'step': 4743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:13.871446', 'step': 4743, 'epoch': 1} {'type': 'loss', 'content': 0.10816004127264023, 'timestamp': '2025-09-10 02:39:13.877840', 'step': 4744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:13.931451', 'step': 4744, 'epoch': 1} {'type': 'loss', 'content': 0.08876681327819824, 'timestamp': '2025-09-10 02:39:13.933713', 'step': 4745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:13.988019', 'step': 4745, 'epoch': 1} {'type': 'loss', 'content': 0.25645384192466736, 'timestamp': '2025-09-10 02:39:13.992174', 'step': 4746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:14.046782', 'step': 4746, 'epoch': 1} {'type': 'loss', 'content': 0.2624112665653229, 'timestamp': '2025-09-10 02:39:14.049044', 'step': 4747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:14.102220', 'step': 4747, 'epoch': 1} {'type': 'loss', 'content': 0.10764046758413315, 'timestamp': '2025-09-10 02:39:14.108009', 'step': 4748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:14.163194', 'step': 4748, 'epoch': 1} {'type': 'loss', 'content': 0.19712147116661072, 'timestamp': '2025-09-10 02:39:14.165410', 'step': 4749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:14.219626', 'step': 4749, 'epoch': 1} {'type': 'loss', 'content': 0.1387161910533905, 'timestamp': '2025-09-10 02:39:14.221861', 'step': 4750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:14.276202', 'step': 4750, 'epoch': 1} {'type': 'loss', 'content': 0.2224498987197876, 'timestamp': '2025-09-10 02:39:14.278435', 'step': 4751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:14.333429', 'step': 4751, 'epoch': 1} {'type': 'loss', 'content': 0.18779698014259338, 'timestamp': '2025-09-10 02:39:14.339714', 'step': 4752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:14.395111', 'step': 4752, 'epoch': 1} {'type': 'loss', 'content': 0.25030025839805603, 'timestamp': '2025-09-10 02:39:14.397815', 'step': 4753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:14.457015', 'step': 4753, 'epoch': 1} {'type': 'loss', 'content': 0.17505298554897308, 'timestamp': '2025-09-10 02:39:14.459319', 'step': 4754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:14.517268', 'step': 4754, 'epoch': 1} {'type': 'loss', 'content': 0.17877233028411865, 'timestamp': '2025-09-10 02:39:14.519624', 'step': 4755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:14.573372', 'step': 4755, 'epoch': 1} {'type': 'loss', 'content': 0.18456067144870758, 'timestamp': '2025-09-10 02:39:14.579495', 'step': 4756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:14.633939', 'step': 4756, 'epoch': 1} {'type': 'loss', 'content': 0.14611674845218658, 'timestamp': '2025-09-10 02:39:14.636373', 'step': 4757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:14.690944', 'step': 4757, 'epoch': 1} {'type': 'loss', 'content': 0.11560201644897461, 'timestamp': '2025-09-10 02:39:14.693354', 'step': 4758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:14.746439', 'step': 4758, 'epoch': 1} {'type': 'loss', 'content': 0.16711018979549408, 'timestamp': '2025-09-10 02:39:14.748630', 'step': 4759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:14.804950', 'step': 4759, 'epoch': 1} {'type': 'loss', 'content': 0.16539575159549713, 'timestamp': '2025-09-10 02:39:14.810700', 'step': 4760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:14.864451', 'step': 4760, 'epoch': 1} {'type': 'loss', 'content': 0.21276043355464935, 'timestamp': '2025-09-10 02:39:14.866474', 'step': 4761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:14.920801', 'step': 4761, 'epoch': 1} {'type': 'loss', 'content': 0.08703906834125519, 'timestamp': '2025-09-10 02:39:14.922943', 'step': 4762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:14.977247', 'step': 4762, 'epoch': 1} {'type': 'loss', 'content': 0.06795945018529892, 'timestamp': '2025-09-10 02:39:14.979501', 'step': 4763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:15.033471', 'step': 4763, 'epoch': 1} {'type': 'loss', 'content': 0.13787750899791718, 'timestamp': '2025-09-10 02:39:15.039545', 'step': 4764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:15.092961', 'step': 4764, 'epoch': 1} {'type': 'loss', 'content': 0.13551649451255798, 'timestamp': '2025-09-10 02:39:15.095121', 'step': 4765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:15.148135', 'step': 4765, 'epoch': 1} {'type': 'loss', 'content': 0.3040623366832733, 'timestamp': '2025-09-10 02:39:15.150204', 'step': 4766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:15.203560', 'step': 4766, 'epoch': 1} {'type': 'loss', 'content': 0.2545526921749115, 'timestamp': '2025-09-10 02:39:15.205612', 'step': 4767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:15.258775', 'step': 4767, 'epoch': 1} {'type': 'loss', 'content': 0.21459972858428955, 'timestamp': '2025-09-10 02:39:15.264779', 'step': 4768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:15.318138', 'step': 4768, 'epoch': 1} {'type': 'loss', 'content': 0.14130039513111115, 'timestamp': '2025-09-10 02:39:15.320397', 'step': 4769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:15.373247', 'step': 4769, 'epoch': 1} {'type': 'loss', 'content': 0.2196035236120224, 'timestamp': '2025-09-10 02:39:15.375668', 'step': 4770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:15.429442', 'step': 4770, 'epoch': 1} {'type': 'loss', 'content': 0.13163141906261444, 'timestamp': '2025-09-10 02:39:15.431923', 'step': 4771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:15.486984', 'step': 4771, 'epoch': 1} {'type': 'loss', 'content': 0.13071638345718384, 'timestamp': '2025-09-10 02:39:15.492943', 'step': 4772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:15.546815', 'step': 4772, 'epoch': 1} {'type': 'loss', 'content': 0.18120242655277252, 'timestamp': '2025-09-10 02:39:15.549571', 'step': 4773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:15.602420', 'step': 4773, 'epoch': 1} {'type': 'loss', 'content': 0.1854296773672104, 'timestamp': '2025-09-10 02:39:15.604686', 'step': 4774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:15.657265', 'step': 4774, 'epoch': 1} {'type': 'loss', 'content': 0.19801323115825653, 'timestamp': '2025-09-10 02:39:15.659538', 'step': 4775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:15.712336', 'step': 4775, 'epoch': 1} {'type': 'loss', 'content': 0.08619940280914307, 'timestamp': '2025-09-10 02:39:15.718306', 'step': 4776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:15.770870', 'step': 4776, 'epoch': 1} {'type': 'loss', 'content': 0.13601714372634888, 'timestamp': '2025-09-10 02:39:15.772930', 'step': 4777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:15.826241', 'step': 4777, 'epoch': 1} {'type': 'loss', 'content': 0.12254098057746887, 'timestamp': '2025-09-10 02:39:15.828544', 'step': 4778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:15.883385', 'step': 4778, 'epoch': 1} {'type': 'loss', 'content': 0.14364078640937805, 'timestamp': '2025-09-10 02:39:15.885699', 'step': 4779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:15.939109', 'step': 4779, 'epoch': 1} {'type': 'loss', 'content': 0.1722804456949234, 'timestamp': '2025-09-10 02:39:15.945132', 'step': 4780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:15.998382', 'step': 4780, 'epoch': 1} {'type': 'loss', 'content': 0.26806554198265076, 'timestamp': '2025-09-10 02:39:16.000446', 'step': 4781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:16.054215', 'step': 4781, 'epoch': 1} {'type': 'loss', 'content': 0.24556592106819153, 'timestamp': '2025-09-10 02:39:16.056364', 'step': 4782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:16.110108', 'step': 4782, 'epoch': 1} {'type': 'loss', 'content': 0.1080947294831276, 'timestamp': '2025-09-10 02:39:16.112384', 'step': 4783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:16.165629', 'step': 4783, 'epoch': 1} {'type': 'loss', 'content': 0.23985350131988525, 'timestamp': '2025-09-10 02:39:16.172070', 'step': 4784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:16.225544', 'step': 4784, 'epoch': 1} {'type': 'loss', 'content': 0.12428945302963257, 'timestamp': '2025-09-10 02:39:16.227735', 'step': 4785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:16.281594', 'step': 4785, 'epoch': 1} {'type': 'loss', 'content': 0.19211514294147491, 'timestamp': '2025-09-10 02:39:16.284197', 'step': 4786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:16.340022', 'step': 4786, 'epoch': 1} {'type': 'loss', 'content': 0.1300203800201416, 'timestamp': '2025-09-10 02:39:16.342696', 'step': 4787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:16.395841', 'step': 4787, 'epoch': 1} {'type': 'loss', 'content': 0.12292203307151794, 'timestamp': '2025-09-10 02:39:16.402056', 'step': 4788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:16.454765', 'step': 4788, 'epoch': 1} {'type': 'loss', 'content': 0.13794656097888947, 'timestamp': '2025-09-10 02:39:16.456983', 'step': 4789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:16.510998', 'step': 4789, 'epoch': 1} {'type': 'loss', 'content': 0.1574012190103531, 'timestamp': '2025-09-10 02:39:16.513386', 'step': 4790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:16.567940', 'step': 4790, 'epoch': 1} {'type': 'loss', 'content': 0.15228532254695892, 'timestamp': '2025-09-10 02:39:16.570792', 'step': 4791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:16.624198', 'step': 4791, 'epoch': 1} {'type': 'loss', 'content': 0.2594553530216217, 'timestamp': '2025-09-10 02:39:16.630202', 'step': 4792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:16.685373', 'step': 4792, 'epoch': 1} {'type': 'loss', 'content': 0.27632418274879456, 'timestamp': '2025-09-10 02:39:16.687441', 'step': 4793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:16.741433', 'step': 4793, 'epoch': 1} {'type': 'loss', 'content': 0.1825508326292038, 'timestamp': '2025-09-10 02:39:16.743471', 'step': 4794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:16.798562', 'step': 4794, 'epoch': 1} {'type': 'loss', 'content': 0.19647115468978882, 'timestamp': '2025-09-10 02:39:16.800730', 'step': 4795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:16.854524', 'step': 4795, 'epoch': 1} {'type': 'loss', 'content': 0.1236797347664833, 'timestamp': '2025-09-10 02:39:16.860795', 'step': 4796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:16.913020', 'step': 4796, 'epoch': 1} {'type': 'loss', 'content': 0.16512854397296906, 'timestamp': '2025-09-10 02:39:16.915002', 'step': 4797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:16.968192', 'step': 4797, 'epoch': 1} {'type': 'loss', 'content': 0.14030241966247559, 'timestamp': '2025-09-10 02:39:16.970424', 'step': 4798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:17.025813', 'step': 4798, 'epoch': 1} {'type': 'loss', 'content': 0.15468810498714447, 'timestamp': '2025-09-10 02:39:17.027960', 'step': 4799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:17.082205', 'step': 4799, 'epoch': 1} {'type': 'loss', 'content': 0.16068235039710999, 'timestamp': '2025-09-10 02:39:17.088330', 'step': 4800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:17.141380', 'step': 4800, 'epoch': 1} {'type': 'loss', 'content': 0.1372949331998825, 'timestamp': '2025-09-10 02:39:17.143612', 'step': 4801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:17.197712', 'step': 4801, 'epoch': 1} {'type': 'loss', 'content': 0.17300625145435333, 'timestamp': '2025-09-10 02:39:17.199771', 'step': 4802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:17.255844', 'step': 4802, 'epoch': 1} {'type': 'loss', 'content': 0.16159726679325104, 'timestamp': '2025-09-10 02:39:17.258045', 'step': 4803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:17.310975', 'step': 4803, 'epoch': 1} {'type': 'loss', 'content': 0.19368533790111542, 'timestamp': '2025-09-10 02:39:17.316950', 'step': 4804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:17.369695', 'step': 4804, 'epoch': 1} {'type': 'loss', 'content': 0.28966811299324036, 'timestamp': '2025-09-10 02:39:17.372210', 'step': 4805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:17.425490', 'step': 4805, 'epoch': 1} {'type': 'loss', 'content': 0.16785100102424622, 'timestamp': '2025-09-10 02:39:17.427738', 'step': 4806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:17.481141', 'step': 4806, 'epoch': 1} {'type': 'loss', 'content': 0.08808635175228119, 'timestamp': '2025-09-10 02:39:17.483408', 'step': 4807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:17.537033', 'step': 4807, 'epoch': 1} {'type': 'loss', 'content': 0.1618451178073883, 'timestamp': '2025-09-10 02:39:17.543360', 'step': 4808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:17.596840', 'step': 4808, 'epoch': 1} {'type': 'loss', 'content': 0.17857222259044647, 'timestamp': '2025-09-10 02:39:17.599061', 'step': 4809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:17.653534', 'step': 4809, 'epoch': 1} {'type': 'loss', 'content': 0.18358179926872253, 'timestamp': '2025-09-10 02:39:17.655710', 'step': 4810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:17.710135', 'step': 4810, 'epoch': 1} {'type': 'loss', 'content': 0.1145152598619461, 'timestamp': '2025-09-10 02:39:17.712090', 'step': 4811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:17.767118', 'step': 4811, 'epoch': 1} {'type': 'loss', 'content': 0.1785217672586441, 'timestamp': '2025-09-10 02:39:17.773289', 'step': 4812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:17.828827', 'step': 4812, 'epoch': 1} {'type': 'loss', 'content': 0.2581697702407837, 'timestamp': '2025-09-10 02:39:17.831205', 'step': 4813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:17.886001', 'step': 4813, 'epoch': 1} {'type': 'loss', 'content': 0.24671389162540436, 'timestamp': '2025-09-10 02:39:17.888550', 'step': 4814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:17.942970', 'step': 4814, 'epoch': 1} {'type': 'loss', 'content': 0.11189625412225723, 'timestamp': '2025-09-10 02:39:17.945125', 'step': 4815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:17.998766', 'step': 4815, 'epoch': 1} {'type': 'loss', 'content': 0.17282062768936157, 'timestamp': '2025-09-10 02:39:18.004776', 'step': 4816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:18.057274', 'step': 4816, 'epoch': 1} {'type': 'loss', 'content': 0.16849935054779053, 'timestamp': '2025-09-10 02:39:18.059648', 'step': 4817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:18.113016', 'step': 4817, 'epoch': 1} {'type': 'loss', 'content': 0.12953002750873566, 'timestamp': '2025-09-10 02:39:18.115124', 'step': 4818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:18.167628', 'step': 4818, 'epoch': 1} {'type': 'loss', 'content': 0.09925445169210434, 'timestamp': '2025-09-10 02:39:18.169788', 'step': 4819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:18.223511', 'step': 4819, 'epoch': 1} {'type': 'loss', 'content': 0.16058878600597382, 'timestamp': '2025-09-10 02:39:18.229318', 'step': 4820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:18.282359', 'step': 4820, 'epoch': 1} {'type': 'loss', 'content': 0.15922008454799652, 'timestamp': '2025-09-10 02:39:18.284398', 'step': 4821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:18.337981', 'step': 4821, 'epoch': 1} {'type': 'loss', 'content': 0.19330279529094696, 'timestamp': '2025-09-10 02:39:18.340124', 'step': 4822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:18.396115', 'step': 4822, 'epoch': 1} {'type': 'loss', 'content': 0.09916873276233673, 'timestamp': '2025-09-10 02:39:18.398193', 'step': 4823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:18.450874', 'step': 4823, 'epoch': 1} {'type': 'loss', 'content': 0.11461304128170013, 'timestamp': '2025-09-10 02:39:18.456754', 'step': 4824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:18.509803', 'step': 4824, 'epoch': 1} {'type': 'loss', 'content': 0.09575999528169632, 'timestamp': '2025-09-10 02:39:18.511820', 'step': 4825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:18.565280', 'step': 4825, 'epoch': 1} {'type': 'loss', 'content': 0.11964239925146103, 'timestamp': '2025-09-10 02:39:18.567505', 'step': 4826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:18.622234', 'step': 4826, 'epoch': 1} {'type': 'loss', 'content': 0.09774933755397797, 'timestamp': '2025-09-10 02:39:18.624520', 'step': 4827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:18.677017', 'step': 4827, 'epoch': 1} {'type': 'loss', 'content': 0.12063034623861313, 'timestamp': '2025-09-10 02:39:18.683321', 'step': 4828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:18.735724', 'step': 4828, 'epoch': 1} {'type': 'loss', 'content': 0.16379575431346893, 'timestamp': '2025-09-10 02:39:18.738013', 'step': 4829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:18.791817', 'step': 4829, 'epoch': 1} {'type': 'loss', 'content': 0.15567782521247864, 'timestamp': '2025-09-10 02:39:18.794248', 'step': 4830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:18.849455', 'step': 4830, 'epoch': 1} {'type': 'loss', 'content': 0.13857263326644897, 'timestamp': '2025-09-10 02:39:18.851473', 'step': 4831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:18.905927', 'step': 4831, 'epoch': 1} {'type': 'loss', 'content': 0.19584962725639343, 'timestamp': '2025-09-10 02:39:18.912096', 'step': 4832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:18.965642', 'step': 4832, 'epoch': 1} {'type': 'loss', 'content': 0.11507715284824371, 'timestamp': '2025-09-10 02:39:18.967807', 'step': 4833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:19.020622', 'step': 4833, 'epoch': 1} {'type': 'loss', 'content': 0.16814757883548737, 'timestamp': '2025-09-10 02:39:19.022951', 'step': 4834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:19.075813', 'step': 4834, 'epoch': 1} {'type': 'loss', 'content': 0.11212871968746185, 'timestamp': '2025-09-10 02:39:19.077850', 'step': 4835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:19.130749', 'step': 4835, 'epoch': 1} {'type': 'loss', 'content': 0.13430067896842957, 'timestamp': '2025-09-10 02:39:19.136696', 'step': 4836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:19.189769', 'step': 4836, 'epoch': 1} {'type': 'loss', 'content': 0.07911831885576248, 'timestamp': '2025-09-10 02:39:19.192080', 'step': 4837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:19.245545', 'step': 4837, 'epoch': 1} {'type': 'loss', 'content': 0.19239063560962677, 'timestamp': '2025-09-10 02:39:19.247570', 'step': 4838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:19.300523', 'step': 4838, 'epoch': 1} {'type': 'loss', 'content': 0.24847039580345154, 'timestamp': '2025-09-10 02:39:19.302676', 'step': 4839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:19.356199', 'step': 4839, 'epoch': 1} {'type': 'loss', 'content': 0.13530531525611877, 'timestamp': '2025-09-10 02:39:19.362345', 'step': 4840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:19.415620', 'step': 4840, 'epoch': 1} {'type': 'loss', 'content': 0.12454329431056976, 'timestamp': '2025-09-10 02:39:19.417809', 'step': 4841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:19.472260', 'step': 4841, 'epoch': 1} {'type': 'loss', 'content': 0.15138138830661774, 'timestamp': '2025-09-10 02:39:19.474553', 'step': 4842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:19.529059', 'step': 4842, 'epoch': 1} {'type': 'loss', 'content': 0.23479759693145752, 'timestamp': '2025-09-10 02:39:19.531423', 'step': 4843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:19.584315', 'step': 4843, 'epoch': 1} {'type': 'loss', 'content': 0.14134712517261505, 'timestamp': '2025-09-10 02:39:19.590684', 'step': 4844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:19.642981', 'step': 4844, 'epoch': 1} {'type': 'loss', 'content': 0.14304253458976746, 'timestamp': '2025-09-10 02:39:19.645201', 'step': 4845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:19.698490', 'step': 4845, 'epoch': 1} {'type': 'loss', 'content': 0.07431288063526154, 'timestamp': '2025-09-10 02:39:19.700453', 'step': 4846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:19.753671', 'step': 4846, 'epoch': 1} {'type': 'loss', 'content': 0.16263329982757568, 'timestamp': '2025-09-10 02:39:19.757146', 'step': 4847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:19.813219', 'step': 4847, 'epoch': 1} {'type': 'loss', 'content': 0.16515226662158966, 'timestamp': '2025-09-10 02:39:19.819115', 'step': 4848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:19.873152', 'step': 4848, 'epoch': 1} {'type': 'loss', 'content': 0.28199276328086853, 'timestamp': '2025-09-10 02:39:19.875314', 'step': 4849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:19.929065', 'step': 4849, 'epoch': 1} {'type': 'loss', 'content': 0.12892258167266846, 'timestamp': '2025-09-10 02:39:19.931051', 'step': 4850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:19.984916', 'step': 4850, 'epoch': 1} {'type': 'loss', 'content': 0.1273316890001297, 'timestamp': '2025-09-10 02:39:19.987187', 'step': 4851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:20.041385', 'step': 4851, 'epoch': 1} {'type': 'loss', 'content': 0.191279336810112, 'timestamp': '2025-09-10 02:39:20.047387', 'step': 4852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:20.099615', 'step': 4852, 'epoch': 1} {'type': 'loss', 'content': 0.1349015235900879, 'timestamp': '2025-09-10 02:39:20.101645', 'step': 4853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:20.155773', 'step': 4853, 'epoch': 1} {'type': 'loss', 'content': 0.140501469373703, 'timestamp': '2025-09-10 02:39:20.157854', 'step': 4854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:20.210955', 'step': 4854, 'epoch': 1} {'type': 'loss', 'content': 0.18442344665527344, 'timestamp': '2025-09-10 02:39:20.213085', 'step': 4855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:20.265791', 'step': 4855, 'epoch': 1} {'type': 'loss', 'content': 0.1438162624835968, 'timestamp': '2025-09-10 02:39:20.271669', 'step': 4856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:20.324312', 'step': 4856, 'epoch': 1} {'type': 'loss', 'content': 0.0765123963356018, 'timestamp': '2025-09-10 02:39:20.326550', 'step': 4857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:20.379649', 'step': 4857, 'epoch': 1} {'type': 'loss', 'content': 0.3568733334541321, 'timestamp': '2025-09-10 02:39:20.381862', 'step': 4858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:20.435763', 'step': 4858, 'epoch': 1} {'type': 'loss', 'content': 0.2232479751110077, 'timestamp': '2025-09-10 02:39:20.437758', 'step': 4859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:20.491184', 'step': 4859, 'epoch': 1} {'type': 'loss', 'content': 0.13135918974876404, 'timestamp': '2025-09-10 02:39:20.497194', 'step': 4860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:20.550309', 'step': 4860, 'epoch': 1} {'type': 'loss', 'content': 0.22971965372562408, 'timestamp': '2025-09-10 02:39:20.552285', 'step': 4861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:20.607211', 'step': 4861, 'epoch': 1} {'type': 'loss', 'content': 0.15068387985229492, 'timestamp': '2025-09-10 02:39:20.609225', 'step': 4862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:20.661938', 'step': 4862, 'epoch': 1} {'type': 'loss', 'content': 0.28779536485671997, 'timestamp': '2025-09-10 02:39:20.664057', 'step': 4863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:20.716790', 'step': 4863, 'epoch': 1} {'type': 'loss', 'content': 0.09718265384435654, 'timestamp': '2025-09-10 02:39:20.722708', 'step': 4864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:20.775604', 'step': 4864, 'epoch': 1} {'type': 'loss', 'content': 0.13556154072284698, 'timestamp': '2025-09-10 02:39:20.777640', 'step': 4865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:20.830889', 'step': 4865, 'epoch': 1} {'type': 'loss', 'content': 0.22614911198616028, 'timestamp': '2025-09-10 02:39:20.832927', 'step': 4866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:20.885922', 'step': 4866, 'epoch': 1} {'type': 'loss', 'content': 0.19000278413295746, 'timestamp': '2025-09-10 02:39:20.887955', 'step': 4867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:20.941230', 'step': 4867, 'epoch': 1} {'type': 'loss', 'content': 0.20175166428089142, 'timestamp': '2025-09-10 02:39:20.947094', 'step': 4868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:21.000386', 'step': 4868, 'epoch': 1} {'type': 'loss', 'content': 0.2743090093135834, 'timestamp': '2025-09-10 02:39:21.002631', 'step': 4869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:21.057468', 'step': 4869, 'epoch': 1} {'type': 'loss', 'content': 0.20387014746665955, 'timestamp': '2025-09-10 02:39:21.059684', 'step': 4870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:21.112872', 'step': 4870, 'epoch': 1} {'type': 'loss', 'content': 0.1654277890920639, 'timestamp': '2025-09-10 02:39:21.115229', 'step': 4871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:21.169467', 'step': 4871, 'epoch': 1} {'type': 'loss', 'content': 0.1259179413318634, 'timestamp': '2025-09-10 02:39:21.177705', 'step': 4872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:21.230797', 'step': 4872, 'epoch': 1} {'type': 'loss', 'content': 0.12643271684646606, 'timestamp': '2025-09-10 02:39:21.233122', 'step': 4873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:21.287157', 'step': 4873, 'epoch': 1} {'type': 'loss', 'content': 0.19527903199195862, 'timestamp': '2025-09-10 02:39:21.293313', 'step': 4874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:21.355534', 'step': 4874, 'epoch': 1} {'type': 'loss', 'content': 0.16475573182106018, 'timestamp': '2025-09-10 02:39:21.364044', 'step': 4875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:21.417630', 'step': 4875, 'epoch': 1} {'type': 'loss', 'content': 0.14430198073387146, 'timestamp': '2025-09-10 02:39:21.423749', 'step': 4876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:21.476301', 'step': 4876, 'epoch': 1} {'type': 'loss', 'content': 0.18963515758514404, 'timestamp': '2025-09-10 02:39:21.478615', 'step': 4877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:21.533452', 'step': 4877, 'epoch': 1} {'type': 'loss', 'content': 0.10574576258659363, 'timestamp': '2025-09-10 02:39:21.535662', 'step': 4878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:21.590728', 'step': 4878, 'epoch': 1} {'type': 'loss', 'content': 0.23512934148311615, 'timestamp': '2025-09-10 02:39:21.593032', 'step': 4879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:21.647634', 'step': 4879, 'epoch': 1} {'type': 'loss', 'content': 0.13251946866512299, 'timestamp': '2025-09-10 02:39:21.653787', 'step': 4880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:21.706424', 'step': 4880, 'epoch': 1} {'type': 'loss', 'content': 0.2143847942352295, 'timestamp': '2025-09-10 02:39:21.708650', 'step': 4881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:21.762821', 'step': 4881, 'epoch': 1} {'type': 'loss', 'content': 0.1984947919845581, 'timestamp': '2025-09-10 02:39:21.765053', 'step': 4882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:21.818413', 'step': 4882, 'epoch': 1} {'type': 'loss', 'content': 0.12084509432315826, 'timestamp': '2025-09-10 02:39:21.820504', 'step': 4883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:21.873887', 'step': 4883, 'epoch': 1} {'type': 'loss', 'content': 0.16522876918315887, 'timestamp': '2025-09-10 02:39:21.879901', 'step': 4884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:21.932318', 'step': 4884, 'epoch': 1} {'type': 'loss', 'content': 0.11670105904340744, 'timestamp': '2025-09-10 02:39:21.934462', 'step': 4885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:21.987293', 'step': 4885, 'epoch': 1} {'type': 'loss', 'content': 0.22097837924957275, 'timestamp': '2025-09-10 02:39:21.989304', 'step': 4886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:22.043073', 'step': 4886, 'epoch': 1} {'type': 'loss', 'content': 0.18768349289894104, 'timestamp': '2025-09-10 02:39:22.045395', 'step': 4887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:22.099611', 'step': 4887, 'epoch': 1} {'type': 'loss', 'content': 0.17451980710029602, 'timestamp': '2025-09-10 02:39:22.105674', 'step': 4888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:22.158977', 'step': 4888, 'epoch': 1} {'type': 'loss', 'content': 0.20994295179843903, 'timestamp': '2025-09-10 02:39:22.161185', 'step': 4889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:22.215666', 'step': 4889, 'epoch': 1} {'type': 'loss', 'content': 0.20259712636470795, 'timestamp': '2025-09-10 02:39:22.217930', 'step': 4890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:22.270863', 'step': 4890, 'epoch': 1} {'type': 'loss', 'content': 0.1646290123462677, 'timestamp': '2025-09-10 02:39:22.272955', 'step': 4891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:22.325849', 'step': 4891, 'epoch': 1} {'type': 'loss', 'content': 0.11238529533147812, 'timestamp': '2025-09-10 02:39:22.331887', 'step': 4892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:22.384178', 'step': 4892, 'epoch': 1} {'type': 'loss', 'content': 0.23177282512187958, 'timestamp': '2025-09-10 02:39:22.386235', 'step': 4893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:22.438799', 'step': 4893, 'epoch': 1} {'type': 'loss', 'content': 0.19637489318847656, 'timestamp': '2025-09-10 02:39:22.440823', 'step': 4894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:22.494422', 'step': 4894, 'epoch': 1} {'type': 'loss', 'content': 0.25926101207733154, 'timestamp': '2025-09-10 02:39:22.496885', 'step': 4895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:22.550245', 'step': 4895, 'epoch': 1} {'type': 'loss', 'content': 0.12212998420000076, 'timestamp': '2025-09-10 02:39:22.557439', 'step': 4896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:22.611440', 'step': 4896, 'epoch': 1} {'type': 'loss', 'content': 0.17013898491859436, 'timestamp': '2025-09-10 02:39:22.613432', 'step': 4897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:22.667772', 'step': 4897, 'epoch': 1} {'type': 'loss', 'content': 0.09351448714733124, 'timestamp': '2025-09-10 02:39:22.670043', 'step': 4898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:22.724006', 'step': 4898, 'epoch': 1} {'type': 'loss', 'content': 0.18889883160591125, 'timestamp': '2025-09-10 02:39:22.726322', 'step': 4899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:22.779220', 'step': 4899, 'epoch': 1} {'type': 'loss', 'content': 0.20525433123111725, 'timestamp': '2025-09-10 02:39:22.793153', 'step': 4900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:22.869025', 'step': 4900, 'epoch': 1} {'type': 'loss', 'content': 0.13015788793563843, 'timestamp': '2025-09-10 02:39:22.871199', 'step': 4901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:22.931668', 'step': 4901, 'epoch': 1} {'type': 'loss', 'content': 0.17122294008731842, 'timestamp': '2025-09-10 02:39:22.934044', 'step': 4902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:22.987925', 'step': 4902, 'epoch': 1} {'type': 'loss', 'content': 0.16220249235630035, 'timestamp': '2025-09-10 02:39:22.992847', 'step': 4903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.052355', 'step': 4903, 'epoch': 1} {'type': 'loss', 'content': 0.18162113428115845, 'timestamp': '2025-09-10 02:39:23.058353', 'step': 4904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.117734', 'step': 4904, 'epoch': 1} {'type': 'loss', 'content': 0.15679526329040527, 'timestamp': '2025-09-10 02:39:23.128555', 'step': 4905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:23.188331', 'step': 4905, 'epoch': 1} {'type': 'loss', 'content': 0.1617536097764969, 'timestamp': '2025-09-10 02:39:23.190560', 'step': 4906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.248577', 'step': 4906, 'epoch': 1} {'type': 'loss', 'content': 0.14260780811309814, 'timestamp': '2025-09-10 02:39:23.250660', 'step': 4907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:23.304181', 'step': 4907, 'epoch': 1} {'type': 'loss', 'content': 0.1508128046989441, 'timestamp': '2025-09-10 02:39:23.310166', 'step': 4908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:23.362459', 'step': 4908, 'epoch': 1} {'type': 'loss', 'content': 0.13990351557731628, 'timestamp': '2025-09-10 02:39:23.364715', 'step': 4909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:23.417631', 'step': 4909, 'epoch': 1} {'type': 'loss', 'content': 0.09501957893371582, 'timestamp': '2025-09-10 02:39:23.420111', 'step': 4910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.474291', 'step': 4910, 'epoch': 1} {'type': 'loss', 'content': 0.22486577928066254, 'timestamp': '2025-09-10 02:39:23.476420', 'step': 4911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.533563', 'step': 4911, 'epoch': 1} {'type': 'loss', 'content': 0.1966400295495987, 'timestamp': '2025-09-10 02:39:23.539863', 'step': 4912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:23.592064', 'step': 4912, 'epoch': 1} {'type': 'loss', 'content': 0.14126072824001312, 'timestamp': '2025-09-10 02:39:23.594235', 'step': 4913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.647729', 'step': 4913, 'epoch': 1} {'type': 'loss', 'content': 0.17437878251075745, 'timestamp': '2025-09-10 02:39:23.650004', 'step': 4914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:23.704165', 'step': 4914, 'epoch': 1} {'type': 'loss', 'content': 0.20080392062664032, 'timestamp': '2025-09-10 02:39:23.706676', 'step': 4915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.762630', 'step': 4915, 'epoch': 1} {'type': 'loss', 'content': 0.13105204701423645, 'timestamp': '2025-09-10 02:39:23.768934', 'step': 4916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.824468', 'step': 4916, 'epoch': 1} {'type': 'loss', 'content': 0.17098650336265564, 'timestamp': '2025-09-10 02:39:23.826613', 'step': 4917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.880685', 'step': 4917, 'epoch': 1} {'type': 'loss', 'content': 0.12191460281610489, 'timestamp': '2025-09-10 02:39:23.885113', 'step': 4918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:23.938863', 'step': 4918, 'epoch': 1} {'type': 'loss', 'content': 0.14184272289276123, 'timestamp': '2025-09-10 02:39:23.940842', 'step': 4919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:23.994075', 'step': 4919, 'epoch': 1} {'type': 'loss', 'content': 0.1305251121520996, 'timestamp': '2025-09-10 02:39:24.000134', 'step': 4920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:24.059665', 'step': 4920, 'epoch': 1} {'type': 'loss', 'content': 0.17865809798240662, 'timestamp': '2025-09-10 02:39:24.061933', 'step': 4921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:24.117158', 'step': 4921, 'epoch': 1} {'type': 'loss', 'content': 0.10529020428657532, 'timestamp': '2025-09-10 02:39:24.119121', 'step': 4922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:24.173094', 'step': 4922, 'epoch': 1} {'type': 'loss', 'content': 0.18241742253303528, 'timestamp': '2025-09-10 02:39:24.176897', 'step': 4923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:24.236962', 'step': 4923, 'epoch': 1} {'type': 'loss', 'content': 0.12498676031827927, 'timestamp': '2025-09-10 02:39:24.244977', 'step': 4924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:24.303898', 'step': 4924, 'epoch': 1} {'type': 'loss', 'content': 0.14202047884464264, 'timestamp': '2025-09-10 02:39:24.306156', 'step': 4925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:24.361256', 'step': 4925, 'epoch': 1} {'type': 'loss', 'content': 0.15716679394245148, 'timestamp': '2025-09-10 02:39:24.363918', 'step': 4926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:24.418298', 'step': 4926, 'epoch': 1} {'type': 'loss', 'content': 0.27241936326026917, 'timestamp': '2025-09-10 02:39:24.420359', 'step': 4927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:24.473121', 'step': 4927, 'epoch': 1} {'type': 'loss', 'content': 0.16845703125, 'timestamp': '2025-09-10 02:39:24.479280', 'step': 4928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:24.532497', 'step': 4928, 'epoch': 1} {'type': 'loss', 'content': 0.19361121952533722, 'timestamp': '2025-09-10 02:39:24.534778', 'step': 4929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:24.589357', 'step': 4929, 'epoch': 1} {'type': 'loss', 'content': 0.13076922297477722, 'timestamp': '2025-09-10 02:39:24.591824', 'step': 4930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:24.645312', 'step': 4930, 'epoch': 1} {'type': 'loss', 'content': 0.1304277777671814, 'timestamp': '2025-09-10 02:39:24.647295', 'step': 4931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:24.701672', 'step': 4931, 'epoch': 1} {'type': 'loss', 'content': 0.13756921887397766, 'timestamp': '2025-09-10 02:39:24.710316', 'step': 4932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:24.769391', 'step': 4932, 'epoch': 1} {'type': 'loss', 'content': 0.09647893160581589, 'timestamp': '2025-09-10 02:39:24.771832', 'step': 4933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:24.826225', 'step': 4933, 'epoch': 1} {'type': 'loss', 'content': 0.16838276386260986, 'timestamp': '2025-09-10 02:39:24.828315', 'step': 4934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:24.883683', 'step': 4934, 'epoch': 1} {'type': 'loss', 'content': 0.15335427224636078, 'timestamp': '2025-09-10 02:39:24.887528', 'step': 4935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:24.940638', 'step': 4935, 'epoch': 1} {'type': 'loss', 'content': 0.2018536925315857, 'timestamp': '2025-09-10 02:39:24.946499', 'step': 4936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:25.003312', 'step': 4936, 'epoch': 1} {'type': 'loss', 'content': 0.1701631098985672, 'timestamp': '2025-09-10 02:39:25.005645', 'step': 4937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:25.059443', 'step': 4937, 'epoch': 1} {'type': 'loss', 'content': 0.15051157772541046, 'timestamp': '2025-09-10 02:39:25.065611', 'step': 4938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:25.118501', 'step': 4938, 'epoch': 1} {'type': 'loss', 'content': 0.1783994436264038, 'timestamp': '2025-09-10 02:39:25.129712', 'step': 4939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:25.186076', 'step': 4939, 'epoch': 1} {'type': 'loss', 'content': 0.1056092381477356, 'timestamp': '2025-09-10 02:39:25.191951', 'step': 4940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:25.245423', 'step': 4940, 'epoch': 1} {'type': 'loss', 'content': 0.13213779032230377, 'timestamp': '2025-09-10 02:39:25.247488', 'step': 4941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:25.301871', 'step': 4941, 'epoch': 1} {'type': 'loss', 'content': 0.24019505083560944, 'timestamp': '2025-09-10 02:39:25.303666', 'step': 4942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:25.357323', 'step': 4942, 'epoch': 1} {'type': 'loss', 'content': 0.13314327597618103, 'timestamp': '2025-09-10 02:39:25.359531', 'step': 4943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:25.421899', 'step': 4943, 'epoch': 1} {'type': 'loss', 'content': 0.28502124547958374, 'timestamp': '2025-09-10 02:39:25.437284', 'step': 4944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:25.490464', 'step': 4944, 'epoch': 1} {'type': 'loss', 'content': 0.15219202637672424, 'timestamp': '2025-09-10 02:39:25.492349', 'step': 4945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:25.545958', 'step': 4945, 'epoch': 1} {'type': 'loss', 'content': 0.3317880630493164, 'timestamp': '2025-09-10 02:39:25.548004', 'step': 4946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:25.601443', 'step': 4946, 'epoch': 1} {'type': 'loss', 'content': 0.2076040655374527, 'timestamp': '2025-09-10 02:39:25.614145', 'step': 4947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:25.674401', 'step': 4947, 'epoch': 1} {'type': 'loss', 'content': 0.16532857716083527, 'timestamp': '2025-09-10 02:39:25.680329', 'step': 4948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:25.732935', 'step': 4948, 'epoch': 1} {'type': 'loss', 'content': 0.18277239799499512, 'timestamp': '2025-09-10 02:39:25.734826', 'step': 4949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:25.789050', 'step': 4949, 'epoch': 1} {'type': 'loss', 'content': 0.10618085414171219, 'timestamp': '2025-09-10 02:39:25.790948', 'step': 4950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:25.849866', 'step': 4950, 'epoch': 1} {'type': 'loss', 'content': 0.14321453869342804, 'timestamp': '2025-09-10 02:39:25.851822', 'step': 4951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:25.905130', 'step': 4951, 'epoch': 1} {'type': 'loss', 'content': 0.16601619124412537, 'timestamp': '2025-09-10 02:39:25.910930', 'step': 4952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:25.964166', 'step': 4952, 'epoch': 1} {'type': 'loss', 'content': 0.15759839117527008, 'timestamp': '2025-09-10 02:39:25.966445', 'step': 4953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:26.020167', 'step': 4953, 'epoch': 1} {'type': 'loss', 'content': 0.14470358192920685, 'timestamp': '2025-09-10 02:39:26.022268', 'step': 4954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:26.074954', 'step': 4954, 'epoch': 1} {'type': 'loss', 'content': 0.1625036746263504, 'timestamp': '2025-09-10 02:39:26.077001', 'step': 4955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:26.129693', 'step': 4955, 'epoch': 1} {'type': 'loss', 'content': 0.27332696318626404, 'timestamp': '2025-09-10 02:39:26.135572', 'step': 4956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:26.188252', 'step': 4956, 'epoch': 1} {'type': 'loss', 'content': 0.12090334296226501, 'timestamp': '2025-09-10 02:39:26.190306', 'step': 4957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:26.243619', 'step': 4957, 'epoch': 1} {'type': 'loss', 'content': 0.16216091811656952, 'timestamp': '2025-09-10 02:39:26.245578', 'step': 4958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:26.300203', 'step': 4958, 'epoch': 1} {'type': 'loss', 'content': 0.20651943981647491, 'timestamp': '2025-09-10 02:39:26.302307', 'step': 4959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:26.358671', 'step': 4959, 'epoch': 1} {'type': 'loss', 'content': 0.14537790417671204, 'timestamp': '2025-09-10 02:39:26.364942', 'step': 4960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:26.419880', 'step': 4960, 'epoch': 1} {'type': 'loss', 'content': 0.14142683148384094, 'timestamp': '2025-09-10 02:39:26.422290', 'step': 4961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:26.475979', 'step': 4961, 'epoch': 1} {'type': 'loss', 'content': 0.19083666801452637, 'timestamp': '2025-09-10 02:39:26.478233', 'step': 4962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:26.532261', 'step': 4962, 'epoch': 1} {'type': 'loss', 'content': 0.1339913308620453, 'timestamp': '2025-09-10 02:39:26.534592', 'step': 4963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:26.587327', 'step': 4963, 'epoch': 1} {'type': 'loss', 'content': 0.12179207056760788, 'timestamp': '2025-09-10 02:39:26.593418', 'step': 4964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:26.645829', 'step': 4964, 'epoch': 1} {'type': 'loss', 'content': 0.1205606684088707, 'timestamp': '2025-09-10 02:39:26.647861', 'step': 4965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:26.701304', 'step': 4965, 'epoch': 1} {'type': 'loss', 'content': 0.2009212076663971, 'timestamp': '2025-09-10 02:39:26.703146', 'step': 4966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:26.756612', 'step': 4966, 'epoch': 1} {'type': 'loss', 'content': 0.187745600938797, 'timestamp': '2025-09-10 02:39:26.758639', 'step': 4967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:26.811755', 'step': 4967, 'epoch': 1} {'type': 'loss', 'content': 0.09538260847330093, 'timestamp': '2025-09-10 02:39:26.817643', 'step': 4968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:26.871376', 'step': 4968, 'epoch': 1} {'type': 'loss', 'content': 0.23651543259620667, 'timestamp': '2025-09-10 02:39:26.873769', 'step': 4969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:26.937726', 'step': 4969, 'epoch': 1} {'type': 'loss', 'content': 0.1612951159477234, 'timestamp': '2025-09-10 02:39:26.940072', 'step': 4970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:26.994002', 'step': 4970, 'epoch': 1} {'type': 'loss', 'content': 0.19686704874038696, 'timestamp': '2025-09-10 02:39:26.996308', 'step': 4971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:27.048929', 'step': 4971, 'epoch': 1} {'type': 'loss', 'content': 0.11706545948982239, 'timestamp': '2025-09-10 02:39:27.055155', 'step': 4972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:27.109582', 'step': 4972, 'epoch': 1} {'type': 'loss', 'content': 0.16039879620075226, 'timestamp': '2025-09-10 02:39:27.111989', 'step': 4973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:27.167368', 'step': 4973, 'epoch': 1} {'type': 'loss', 'content': 0.21985915303230286, 'timestamp': '2025-09-10 02:39:27.169289', 'step': 4974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:27.223151', 'step': 4974, 'epoch': 1} {'type': 'loss', 'content': 0.12424547225236893, 'timestamp': '2025-09-10 02:39:27.225016', 'step': 4975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:27.277824', 'step': 4975, 'epoch': 1} {'type': 'loss', 'content': 0.10159590095281601, 'timestamp': '2025-09-10 02:39:27.283434', 'step': 4976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:27.338597', 'step': 4976, 'epoch': 1} {'type': 'loss', 'content': 0.16520045697689056, 'timestamp': '2025-09-10 02:39:27.340645', 'step': 4977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:27.395387', 'step': 4977, 'epoch': 1} {'type': 'loss', 'content': 0.14262913167476654, 'timestamp': '2025-09-10 02:39:27.397806', 'step': 4978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:27.462033', 'step': 4978, 'epoch': 1} {'type': 'loss', 'content': 0.2711959779262543, 'timestamp': '2025-09-10 02:39:27.464431', 'step': 4979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:27.523558', 'step': 4979, 'epoch': 1} {'type': 'loss', 'content': 0.12461744993925095, 'timestamp': '2025-09-10 02:39:27.529792', 'step': 4980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:27.586254', 'step': 4980, 'epoch': 1} {'type': 'loss', 'content': 0.15795569121837616, 'timestamp': '2025-09-10 02:39:27.589565', 'step': 4981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:27.643012', 'step': 4981, 'epoch': 1} {'type': 'loss', 'content': 0.11968730390071869, 'timestamp': '2025-09-10 02:39:27.644951', 'step': 4982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:27.697720', 'step': 4982, 'epoch': 1} {'type': 'loss', 'content': 0.11681106686592102, 'timestamp': '2025-09-10 02:39:27.699770', 'step': 4983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:27.754463', 'step': 4983, 'epoch': 1} {'type': 'loss', 'content': 0.2051331102848053, 'timestamp': '2025-09-10 02:39:27.759997', 'step': 4984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:27.812007', 'step': 4984, 'epoch': 1} {'type': 'loss', 'content': 0.23431496322155, 'timestamp': '2025-09-10 02:39:27.813959', 'step': 4985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:27.867442', 'step': 4985, 'epoch': 1} {'type': 'loss', 'content': 0.1779600977897644, 'timestamp': '2025-09-10 02:39:27.869721', 'step': 4986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:27.924554', 'step': 4986, 'epoch': 1} {'type': 'loss', 'content': 0.15532651543617249, 'timestamp': '2025-09-10 02:39:27.927049', 'step': 4987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:27.981201', 'step': 4987, 'epoch': 1} {'type': 'loss', 'content': 0.11710444837808609, 'timestamp': '2025-09-10 02:39:27.987534', 'step': 4988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:28.041320', 'step': 4988, 'epoch': 1} {'type': 'loss', 'content': 0.10007775574922562, 'timestamp': '2025-09-10 02:39:28.043630', 'step': 4989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:28.096794', 'step': 4989, 'epoch': 1} {'type': 'loss', 'content': 0.1347123682498932, 'timestamp': '2025-09-10 02:39:28.099050', 'step': 4990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:28.152673', 'step': 4990, 'epoch': 1} {'type': 'loss', 'content': 0.2189008593559265, 'timestamp': '2025-09-10 02:39:28.154604', 'step': 4991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:28.207736', 'step': 4991, 'epoch': 1} {'type': 'loss', 'content': 0.21365851163864136, 'timestamp': '2025-09-10 02:39:28.213654', 'step': 4992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:28.266922', 'step': 4992, 'epoch': 1} {'type': 'loss', 'content': 0.13546593487262726, 'timestamp': '2025-09-10 02:39:28.268832', 'step': 4993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:28.322171', 'step': 4993, 'epoch': 1} {'type': 'loss', 'content': 0.25300806760787964, 'timestamp': '2025-09-10 02:39:28.324106', 'step': 4994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:28.377333', 'step': 4994, 'epoch': 1} {'type': 'loss', 'content': 0.1526520699262619, 'timestamp': '2025-09-10 02:39:28.379582', 'step': 4995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:28.432691', 'step': 4995, 'epoch': 1} {'type': 'loss', 'content': 0.15170155465602875, 'timestamp': '2025-09-10 02:39:28.438736', 'step': 4996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:28.492614', 'step': 4996, 'epoch': 1} {'type': 'loss', 'content': 0.1587303727865219, 'timestamp': '2025-09-10 02:39:28.494725', 'step': 4997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:28.551278', 'step': 4997, 'epoch': 1} {'type': 'loss', 'content': 0.16029761731624603, 'timestamp': '2025-09-10 02:39:28.553548', 'step': 4998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:28.606966', 'step': 4998, 'epoch': 1} {'type': 'loss', 'content': 0.23869068920612335, 'timestamp': '2025-09-10 02:39:28.609280', 'step': 4999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:28.663024', 'step': 4999, 'epoch': 1} {'type': 'loss', 'content': 0.1356600672006607, 'timestamp': '2025-09-10 02:39:28.668630', 'step': 5000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-09-10 02:39:29.026900', 'step': 5000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:29.083388', 'step': 5000, 'epoch': 1} {'type': 'loss', 'content': 0.13613460958003998, 'timestamp': '2025-09-10 02:39:29.085571', 'step': 5001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:29.140193', 'step': 5001, 'epoch': 1} {'type': 'loss', 'content': 0.12594100832939148, 'timestamp': '2025-09-10 02:39:29.143123', 'step': 5002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:29.203382', 'step': 5002, 'epoch': 1} {'type': 'loss', 'content': 0.15719054639339447, 'timestamp': '2025-09-10 02:39:29.208323', 'step': 5003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:29.263517', 'step': 5003, 'epoch': 1} {'type': 'loss', 'content': 0.14503440260887146, 'timestamp': '2025-09-10 02:39:29.269854', 'step': 5004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:29.322427', 'step': 5004, 'epoch': 1} {'type': 'loss', 'content': 0.20222114026546478, 'timestamp': '2025-09-10 02:39:29.324834', 'step': 5005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:29.378664', 'step': 5005, 'epoch': 1} {'type': 'loss', 'content': 0.20552648603916168, 'timestamp': '2025-09-10 02:39:29.380847', 'step': 5006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:29.434184', 'step': 5006, 'epoch': 1} {'type': 'loss', 'content': 0.15335118770599365, 'timestamp': '2025-09-10 02:39:29.436511', 'step': 5007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:29.490748', 'step': 5007, 'epoch': 1} {'type': 'loss', 'content': 0.16742227971553802, 'timestamp': '2025-09-10 02:39:29.497212', 'step': 5008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:29.553117', 'step': 5008, 'epoch': 1} {'type': 'loss', 'content': 0.19596602022647858, 'timestamp': '2025-09-10 02:39:29.555389', 'step': 5009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:29.609733', 'step': 5009, 'epoch': 1} {'type': 'loss', 'content': 0.1736546754837036, 'timestamp': '2025-09-10 02:39:29.612127', 'step': 5010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:29.670116', 'step': 5010, 'epoch': 1} {'type': 'loss', 'content': 0.2142670899629593, 'timestamp': '2025-09-10 02:39:29.672512', 'step': 5011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:29.725881', 'step': 5011, 'epoch': 1} {'type': 'loss', 'content': 0.1723770797252655, 'timestamp': '2025-09-10 02:39:29.732377', 'step': 5012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:29.786322', 'step': 5012, 'epoch': 1} {'type': 'loss', 'content': 0.1606404334306717, 'timestamp': '2025-09-10 02:39:29.788719', 'step': 5013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:29.845322', 'step': 5013, 'epoch': 1} {'type': 'loss', 'content': 0.1549500972032547, 'timestamp': '2025-09-10 02:39:29.847747', 'step': 5014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:29.901397', 'step': 5014, 'epoch': 1} {'type': 'loss', 'content': 0.11035330593585968, 'timestamp': '2025-09-10 02:39:29.906328', 'step': 5015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:29.963450', 'step': 5015, 'epoch': 1} {'type': 'loss', 'content': 0.14281065762043, 'timestamp': '2025-09-10 02:39:29.969957', 'step': 5016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:30.022506', 'step': 5016, 'epoch': 1} {'type': 'loss', 'content': 0.23382455110549927, 'timestamp': '2025-09-10 02:39:30.024900', 'step': 5017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:30.083336', 'step': 5017, 'epoch': 1} {'type': 'loss', 'content': 0.19338999688625336, 'timestamp': '2025-09-10 02:39:30.086617', 'step': 5018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:30.143582', 'step': 5018, 'epoch': 1} {'type': 'loss', 'content': 0.13710977137088776, 'timestamp': '2025-09-10 02:39:30.145952', 'step': 5019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:30.200203', 'step': 5019, 'epoch': 1} {'type': 'loss', 'content': 0.1876181811094284, 'timestamp': '2025-09-10 02:39:30.206639', 'step': 5020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:30.259490', 'step': 5020, 'epoch': 1} {'type': 'loss', 'content': 0.10209406167268753, 'timestamp': '2025-09-10 02:39:30.261700', 'step': 5021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:30.317852', 'step': 5021, 'epoch': 1} {'type': 'loss', 'content': 0.15556204319000244, 'timestamp': '2025-09-10 02:39:30.320047', 'step': 5022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:30.373737', 'step': 5022, 'epoch': 1} {'type': 'loss', 'content': 0.10275430977344513, 'timestamp': '2025-09-10 02:39:30.376159', 'step': 5023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:30.429360', 'step': 5023, 'epoch': 1} {'type': 'loss', 'content': 0.14357763528823853, 'timestamp': '2025-09-10 02:39:30.435572', 'step': 5024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:30.487858', 'step': 5024, 'epoch': 1} {'type': 'loss', 'content': 0.11169086396694183, 'timestamp': '2025-09-10 02:39:30.490211', 'step': 5025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:30.542805', 'step': 5025, 'epoch': 1} {'type': 'loss', 'content': 0.20722486078739166, 'timestamp': '2025-09-10 02:39:30.545315', 'step': 5026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:30.599486', 'step': 5026, 'epoch': 1} {'type': 'loss', 'content': 0.2431480884552002, 'timestamp': '2025-09-10 02:39:30.601881', 'step': 5027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:30.655436', 'step': 5027, 'epoch': 1} {'type': 'loss', 'content': 0.22612667083740234, 'timestamp': '2025-09-10 02:39:30.661906', 'step': 5028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:30.715616', 'step': 5028, 'epoch': 1} {'type': 'loss', 'content': 0.1667885035276413, 'timestamp': '2025-09-10 02:39:30.717991', 'step': 5029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:30.773885', 'step': 5029, 'epoch': 1} {'type': 'loss', 'content': 0.16078390181064606, 'timestamp': '2025-09-10 02:39:30.776454', 'step': 5030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:30.829912', 'step': 5030, 'epoch': 1} {'type': 'loss', 'content': 0.13460995256900787, 'timestamp': '2025-09-10 02:39:30.832454', 'step': 5031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:30.886303', 'step': 5031, 'epoch': 1} {'type': 'loss', 'content': 0.13051892817020416, 'timestamp': '2025-09-10 02:39:30.892683', 'step': 5032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:30.945100', 'step': 5032, 'epoch': 1} {'type': 'loss', 'content': 0.19031059741973877, 'timestamp': '2025-09-10 02:39:30.947519', 'step': 5033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:31.000397', 'step': 5033, 'epoch': 1} {'type': 'loss', 'content': 0.1646888554096222, 'timestamp': '2025-09-10 02:39:31.002830', 'step': 5034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:31.056145', 'step': 5034, 'epoch': 1} {'type': 'loss', 'content': 0.11255992949008942, 'timestamp': '2025-09-10 02:39:31.058541', 'step': 5035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:31.111712', 'step': 5035, 'epoch': 1} {'type': 'loss', 'content': 0.25757673382759094, 'timestamp': '2025-09-10 02:39:31.118147', 'step': 5036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:31.171794', 'step': 5036, 'epoch': 1} {'type': 'loss', 'content': 0.1275862455368042, 'timestamp': '2025-09-10 02:39:31.174006', 'step': 5037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:31.227237', 'step': 5037, 'epoch': 1} {'type': 'loss', 'content': 0.19868922233581543, 'timestamp': '2025-09-10 02:39:31.229492', 'step': 5038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:31.285956', 'step': 5038, 'epoch': 1} {'type': 'loss', 'content': 0.2670375108718872, 'timestamp': '2025-09-10 02:39:31.288718', 'step': 5039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:31.342216', 'step': 5039, 'epoch': 1} {'type': 'loss', 'content': 0.18135400116443634, 'timestamp': '2025-09-10 02:39:31.348929', 'step': 5040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:31.402813', 'step': 5040, 'epoch': 1} {'type': 'loss', 'content': 0.17041075229644775, 'timestamp': '2025-09-10 02:39:31.405475', 'step': 5041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:31.459723', 'step': 5041, 'epoch': 1} {'type': 'loss', 'content': 0.1650443822145462, 'timestamp': '2025-09-10 02:39:31.462103', 'step': 5042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:31.515581', 'step': 5042, 'epoch': 1} {'type': 'loss', 'content': 0.22326235473155975, 'timestamp': '2025-09-10 02:39:31.517941', 'step': 5043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:31.571422', 'step': 5043, 'epoch': 1} {'type': 'loss', 'content': 0.2028842270374298, 'timestamp': '2025-09-10 02:39:31.577881', 'step': 5044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:31.631168', 'step': 5044, 'epoch': 1} {'type': 'loss', 'content': 0.2585780620574951, 'timestamp': '2025-09-10 02:39:31.633772', 'step': 5045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:31.689275', 'step': 5045, 'epoch': 1} {'type': 'loss', 'content': 0.13324806094169617, 'timestamp': '2025-09-10 02:39:31.691636', 'step': 5046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:31.745008', 'step': 5046, 'epoch': 1} {'type': 'loss', 'content': 0.20664192736148834, 'timestamp': '2025-09-10 02:39:31.747457', 'step': 5047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:31.800638', 'step': 5047, 'epoch': 1} {'type': 'loss', 'content': 0.14178021252155304, 'timestamp': '2025-09-10 02:39:31.807060', 'step': 5048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:31.863302', 'step': 5048, 'epoch': 1} {'type': 'loss', 'content': 0.12769730389118195, 'timestamp': '2025-09-10 02:39:31.865677', 'step': 5049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:31.920999', 'step': 5049, 'epoch': 1} {'type': 'loss', 'content': 0.15946847200393677, 'timestamp': '2025-09-10 02:39:31.923327', 'step': 5050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:31.978417', 'step': 5050, 'epoch': 1} {'type': 'loss', 'content': 0.2065538763999939, 'timestamp': '2025-09-10 02:39:31.980709', 'step': 5051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:32.035542', 'step': 5051, 'epoch': 1} {'type': 'loss', 'content': 0.1908588409423828, 'timestamp': '2025-09-10 02:39:32.041916', 'step': 5052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:32.095412', 'step': 5052, 'epoch': 1} {'type': 'loss', 'content': 0.15578849613666534, 'timestamp': '2025-09-10 02:39:32.097839', 'step': 5053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:32.151152', 'step': 5053, 'epoch': 1} {'type': 'loss', 'content': 0.13911396265029907, 'timestamp': '2025-09-10 02:39:32.153519', 'step': 5054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:32.207690', 'step': 5054, 'epoch': 1} {'type': 'loss', 'content': 0.20432710647583008, 'timestamp': '2025-09-10 02:39:32.210097', 'step': 5055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:32.264393', 'step': 5055, 'epoch': 1} {'type': 'loss', 'content': 0.0999501571059227, 'timestamp': '2025-09-10 02:39:32.270741', 'step': 5056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:32.324787', 'step': 5056, 'epoch': 1} {'type': 'loss', 'content': 0.3462534546852112, 'timestamp': '2025-09-10 02:39:32.327177', 'step': 5057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:32.380610', 'step': 5057, 'epoch': 1} {'type': 'loss', 'content': 0.13246753811836243, 'timestamp': '2025-09-10 02:39:32.383027', 'step': 5058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:32.437020', 'step': 5058, 'epoch': 1} {'type': 'loss', 'content': 0.16235776245594025, 'timestamp': '2025-09-10 02:39:32.439509', 'step': 5059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:32.492687', 'step': 5059, 'epoch': 1} {'type': 'loss', 'content': 0.08614888042211533, 'timestamp': '2025-09-10 02:39:32.498981', 'step': 5060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:32.551568', 'step': 5060, 'epoch': 1} {'type': 'loss', 'content': 0.09335820376873016, 'timestamp': '2025-09-10 02:39:32.553857', 'step': 5061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:32.607887', 'step': 5061, 'epoch': 1} {'type': 'loss', 'content': 0.1563263237476349, 'timestamp': '2025-09-10 02:39:32.610395', 'step': 5062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:32.663794', 'step': 5062, 'epoch': 1} {'type': 'loss', 'content': 0.1421590894460678, 'timestamp': '2025-09-10 02:39:32.666068', 'step': 5063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:32.721261', 'step': 5063, 'epoch': 1} {'type': 'loss', 'content': 0.25826355814933777, 'timestamp': '2025-09-10 02:39:32.727557', 'step': 5064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:32.781615', 'step': 5064, 'epoch': 1} {'type': 'loss', 'content': 0.16378866136074066, 'timestamp': '2025-09-10 02:39:32.783594', 'step': 5065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:32.838151', 'step': 5065, 'epoch': 1} {'type': 'loss', 'content': 0.22335869073867798, 'timestamp': '2025-09-10 02:39:32.840408', 'step': 5066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:32.904680', 'step': 5066, 'epoch': 1} {'type': 'loss', 'content': 0.2200583964586258, 'timestamp': '2025-09-10 02:39:32.907016', 'step': 5067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:32.959920', 'step': 5067, 'epoch': 1} {'type': 'loss', 'content': 0.14731083810329437, 'timestamp': '2025-09-10 02:39:32.966255', 'step': 5068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:33.021446', 'step': 5068, 'epoch': 1} {'type': 'loss', 'content': 0.18641269207000732, 'timestamp': '2025-09-10 02:39:33.023917', 'step': 5069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:33.077667', 'step': 5069, 'epoch': 1} {'type': 'loss', 'content': 0.2308701127767563, 'timestamp': '2025-09-10 02:39:33.080014', 'step': 5070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:33.135277', 'step': 5070, 'epoch': 1} {'type': 'loss', 'content': 0.15075814723968506, 'timestamp': '2025-09-10 02:39:33.137585', 'step': 5071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:33.191398', 'step': 5071, 'epoch': 1} {'type': 'loss', 'content': 0.06510277837514877, 'timestamp': '2025-09-10 02:39:33.197533', 'step': 5072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:33.251070', 'step': 5072, 'epoch': 1} {'type': 'loss', 'content': 0.15181347727775574, 'timestamp': '2025-09-10 02:39:33.253598', 'step': 5073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:33.307429', 'step': 5073, 'epoch': 1} {'type': 'loss', 'content': 0.17937727272510529, 'timestamp': '2025-09-10 02:39:33.309869', 'step': 5074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:33.363846', 'step': 5074, 'epoch': 1} {'type': 'loss', 'content': 0.1590709686279297, 'timestamp': '2025-09-10 02:39:33.366287', 'step': 5075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:33.419836', 'step': 5075, 'epoch': 1} {'type': 'loss', 'content': 0.16688334941864014, 'timestamp': '2025-09-10 02:39:33.426133', 'step': 5076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:33.479025', 'step': 5076, 'epoch': 1} {'type': 'loss', 'content': 0.14071419835090637, 'timestamp': '2025-09-10 02:39:33.481386', 'step': 5077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:33.535297', 'step': 5077, 'epoch': 1} {'type': 'loss', 'content': 0.14309976994991302, 'timestamp': '2025-09-10 02:39:33.538289', 'step': 5078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:33.591840', 'step': 5078, 'epoch': 1} {'type': 'loss', 'content': 0.18472689390182495, 'timestamp': '2025-09-10 02:39:33.594167', 'step': 5079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:33.647571', 'step': 5079, 'epoch': 1} {'type': 'loss', 'content': 0.1160442903637886, 'timestamp': '2025-09-10 02:39:33.653932', 'step': 5080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:33.706223', 'step': 5080, 'epoch': 1} {'type': 'loss', 'content': 0.1813865751028061, 'timestamp': '2025-09-10 02:39:33.708508', 'step': 5081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:33.762158', 'step': 5081, 'epoch': 1} {'type': 'loss', 'content': 0.2306240051984787, 'timestamp': '2025-09-10 02:39:33.764517', 'step': 5082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:33.821517', 'step': 5082, 'epoch': 1} {'type': 'loss', 'content': 0.13922476768493652, 'timestamp': '2025-09-10 02:39:33.823924', 'step': 5083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:33.880912', 'step': 5083, 'epoch': 1} {'type': 'loss', 'content': 0.10860919207334518, 'timestamp': '2025-09-10 02:39:33.887729', 'step': 5084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:33.941917', 'step': 5084, 'epoch': 1} {'type': 'loss', 'content': 0.15035521984100342, 'timestamp': '2025-09-10 02:39:33.944135', 'step': 5085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:33.997626', 'step': 5085, 'epoch': 1} {'type': 'loss', 'content': 0.10744845867156982, 'timestamp': '2025-09-10 02:39:33.999941', 'step': 5086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:34.053035', 'step': 5086, 'epoch': 1} {'type': 'loss', 'content': 0.23769697546958923, 'timestamp': '2025-09-10 02:39:34.055485', 'step': 5087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:34.109026', 'step': 5087, 'epoch': 1} {'type': 'loss', 'content': 0.13862042129039764, 'timestamp': '2025-09-10 02:39:34.115454', 'step': 5088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:34.167958', 'step': 5088, 'epoch': 1} {'type': 'loss', 'content': 0.10337623953819275, 'timestamp': '2025-09-10 02:39:34.170519', 'step': 5089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:34.224302', 'step': 5089, 'epoch': 1} {'type': 'loss', 'content': 0.20349270105361938, 'timestamp': '2025-09-10 02:39:34.226598', 'step': 5090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:34.279739', 'step': 5090, 'epoch': 1} {'type': 'loss', 'content': 0.23861007392406464, 'timestamp': '2025-09-10 02:39:34.282070', 'step': 5091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:34.336889', 'step': 5091, 'epoch': 1} {'type': 'loss', 'content': 0.2752513289451599, 'timestamp': '2025-09-10 02:39:34.343480', 'step': 5092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:34.397705', 'step': 5092, 'epoch': 1} {'type': 'loss', 'content': 0.08938590437173843, 'timestamp': '2025-09-10 02:39:34.400011', 'step': 5093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:34.453253', 'step': 5093, 'epoch': 1} {'type': 'loss', 'content': 0.13836534321308136, 'timestamp': '2025-09-10 02:39:34.455586', 'step': 5094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:34.509281', 'step': 5094, 'epoch': 1} {'type': 'loss', 'content': 0.19691690802574158, 'timestamp': '2025-09-10 02:39:34.511593', 'step': 5095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:34.567015', 'step': 5095, 'epoch': 1} {'type': 'loss', 'content': 0.20815029740333557, 'timestamp': '2025-09-10 02:39:34.573726', 'step': 5096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:34.629165', 'step': 5096, 'epoch': 1} {'type': 'loss', 'content': 0.12057273089885712, 'timestamp': '2025-09-10 02:39:34.631487', 'step': 5097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:34.685993', 'step': 5097, 'epoch': 1} {'type': 'loss', 'content': 0.1172444298863411, 'timestamp': '2025-09-10 02:39:34.688374', 'step': 5098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:34.742669', 'step': 5098, 'epoch': 1} {'type': 'loss', 'content': 0.1664060801267624, 'timestamp': '2025-09-10 02:39:34.744945', 'step': 5099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:34.799447', 'step': 5099, 'epoch': 1} {'type': 'loss', 'content': 0.19398406147956848, 'timestamp': '2025-09-10 02:39:34.806054', 'step': 5100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:34.860980', 'step': 5100, 'epoch': 1} {'type': 'loss', 'content': 0.15586192905902863, 'timestamp': '2025-09-10 02:39:34.864119', 'step': 5101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:34.923682', 'step': 5101, 'epoch': 1} {'type': 'loss', 'content': 0.19603699445724487, 'timestamp': '2025-09-10 02:39:34.925987', 'step': 5102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:34.986461', 'step': 5102, 'epoch': 1} {'type': 'loss', 'content': 0.24044263362884521, 'timestamp': '2025-09-10 02:39:34.989054', 'step': 5103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:35.043580', 'step': 5103, 'epoch': 1} {'type': 'loss', 'content': 0.24261926114559174, 'timestamp': '2025-09-10 02:39:35.050264', 'step': 5104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:35.103709', 'step': 5104, 'epoch': 1} {'type': 'loss', 'content': 0.18069122731685638, 'timestamp': '2025-09-10 02:39:35.106043', 'step': 5105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:35.161176', 'step': 5105, 'epoch': 1} {'type': 'loss', 'content': 0.13181036710739136, 'timestamp': '2025-09-10 02:39:35.163619', 'step': 5106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:35.219920', 'step': 5106, 'epoch': 1} {'type': 'loss', 'content': 0.15829914808273315, 'timestamp': '2025-09-10 02:39:35.222319', 'step': 5107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:35.277437', 'step': 5107, 'epoch': 1} {'type': 'loss', 'content': 0.16083306074142456, 'timestamp': '2025-09-10 02:39:35.283829', 'step': 5108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:35.339223', 'step': 5108, 'epoch': 1} {'type': 'loss', 'content': 0.24667227268218994, 'timestamp': '2025-09-10 02:39:35.341449', 'step': 5109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:35.398345', 'step': 5109, 'epoch': 1} {'type': 'loss', 'content': 0.25806763768196106, 'timestamp': '2025-09-10 02:39:35.400872', 'step': 5110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:35.456595', 'step': 5110, 'epoch': 1} {'type': 'loss', 'content': 0.18692150712013245, 'timestamp': '2025-09-10 02:39:35.458801', 'step': 5111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:35.513461', 'step': 5111, 'epoch': 1} {'type': 'loss', 'content': 0.12886837124824524, 'timestamp': '2025-09-10 02:39:35.519644', 'step': 5112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:35.573438', 'step': 5112, 'epoch': 1} {'type': 'loss', 'content': 0.17282171547412872, 'timestamp': '2025-09-10 02:39:35.575693', 'step': 5113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:35.628799', 'step': 5113, 'epoch': 1} {'type': 'loss', 'content': 0.25830820202827454, 'timestamp': '2025-09-10 02:39:35.630775', 'step': 5114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:35.683523', 'step': 5114, 'epoch': 1} {'type': 'loss', 'content': 0.13428349792957306, 'timestamp': '2025-09-10 02:39:35.685709', 'step': 5115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:35.740192', 'step': 5115, 'epoch': 1} {'type': 'loss', 'content': 0.18901430070400238, 'timestamp': '2025-09-10 02:39:35.746581', 'step': 5116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:35.798982', 'step': 5116, 'epoch': 1} {'type': 'loss', 'content': 0.11283610761165619, 'timestamp': '2025-09-10 02:39:35.801321', 'step': 5117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:35.854136', 'step': 5117, 'epoch': 1} {'type': 'loss', 'content': 0.24666233360767365, 'timestamp': '2025-09-10 02:39:35.856207', 'step': 5118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:35.909339', 'step': 5118, 'epoch': 1} {'type': 'loss', 'content': 0.21276772022247314, 'timestamp': '2025-09-10 02:39:35.911698', 'step': 5119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:35.967416', 'step': 5119, 'epoch': 1} {'type': 'loss', 'content': 0.20592990517616272, 'timestamp': '2025-09-10 02:39:35.973845', 'step': 5120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:36.026822', 'step': 5120, 'epoch': 1} {'type': 'loss', 'content': 0.15765580534934998, 'timestamp': '2025-09-10 02:39:36.029049', 'step': 5121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:36.082194', 'step': 5121, 'epoch': 1} {'type': 'loss', 'content': 0.224537655711174, 'timestamp': '2025-09-10 02:39:36.084293', 'step': 5122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:36.137157', 'step': 5122, 'epoch': 1} {'type': 'loss', 'content': 0.12961532175540924, 'timestamp': '2025-09-10 02:39:36.139495', 'step': 5123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:36.191971', 'step': 5123, 'epoch': 1} {'type': 'loss', 'content': 0.12224269658327103, 'timestamp': '2025-09-10 02:39:36.198317', 'step': 5124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:36.250743', 'step': 5124, 'epoch': 1} {'type': 'loss', 'content': 0.16333787143230438, 'timestamp': '2025-09-10 02:39:36.253070', 'step': 5125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:36.306182', 'step': 5125, 'epoch': 1} {'type': 'loss', 'content': 0.1794387400150299, 'timestamp': '2025-09-10 02:39:36.308509', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:39:49.568542', 'step': 5126, 'epoch': 1} {'type': 'pplx', 'content': 12852.41705392129, 'timestamp': '2025-09-10 02:39:49.571781', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:49.626647', 'step': 5126, 'epoch': 1} {'type': 'loss', 'content': 0.10028054565191269, 'timestamp': '2025-09-10 02:39:49.628945', 'step': 5127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:49.683201', 'step': 5127, 'epoch': 1} {'type': 'loss', 'content': 0.26204177737236023, 'timestamp': '2025-09-10 02:39:49.689351', 'step': 5128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:49.743666', 'step': 5128, 'epoch': 1} {'type': 'loss', 'content': 0.11372353881597519, 'timestamp': '2025-09-10 02:39:49.745796', 'step': 5129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:49.799262', 'step': 5129, 'epoch': 1} {'type': 'loss', 'content': 0.14349091053009033, 'timestamp': '2025-09-10 02:39:49.801464', 'step': 5130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:49.855996', 'step': 5130, 'epoch': 1} {'type': 'loss', 'content': 0.135880246758461, 'timestamp': '2025-09-10 02:39:49.858157', 'step': 5131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:49.912526', 'step': 5131, 'epoch': 1} {'type': 'loss', 'content': 0.12232225388288498, 'timestamp': '2025-09-10 02:39:49.918716', 'step': 5132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:49.972238', 'step': 5132, 'epoch': 1} {'type': 'loss', 'content': 0.18722577393054962, 'timestamp': '2025-09-10 02:39:49.974323', 'step': 5133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:50.028420', 'step': 5133, 'epoch': 1} {'type': 'loss', 'content': 0.15621919929981232, 'timestamp': '2025-09-10 02:39:50.030342', 'step': 5134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:50.083978', 'step': 5134, 'epoch': 1} {'type': 'loss', 'content': 0.18291588127613068, 'timestamp': '2025-09-10 02:39:50.086131', 'step': 5135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:50.141431', 'step': 5135, 'epoch': 1} {'type': 'loss', 'content': 0.15051886439323425, 'timestamp': '2025-09-10 02:39:50.147518', 'step': 5136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:50.202636', 'step': 5136, 'epoch': 1} {'type': 'loss', 'content': 0.1199902668595314, 'timestamp': '2025-09-10 02:39:50.204796', 'step': 5137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:50.262969', 'step': 5137, 'epoch': 1} {'type': 'loss', 'content': 0.24176959693431854, 'timestamp': '2025-09-10 02:39:50.265097', 'step': 5138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:50.319716', 'step': 5138, 'epoch': 1} {'type': 'loss', 'content': 0.22419202327728271, 'timestamp': '2025-09-10 02:39:50.321926', 'step': 5139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:50.376182', 'step': 5139, 'epoch': 1} {'type': 'loss', 'content': 0.15680347383022308, 'timestamp': '2025-09-10 02:39:50.382741', 'step': 5140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:50.436024', 'step': 5140, 'epoch': 1} {'type': 'loss', 'content': 0.16484901309013367, 'timestamp': '2025-09-10 02:39:50.438423', 'step': 5141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:50.491994', 'step': 5141, 'epoch': 1} {'type': 'loss', 'content': 0.14003902673721313, 'timestamp': '2025-09-10 02:39:50.494010', 'step': 5142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:50.547448', 'step': 5142, 'epoch': 1} {'type': 'loss', 'content': 0.1427025943994522, 'timestamp': '2025-09-10 02:39:50.549614', 'step': 5143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:50.605620', 'step': 5143, 'epoch': 1} {'type': 'loss', 'content': 0.1471780687570572, 'timestamp': '2025-09-10 02:39:50.611874', 'step': 5144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:50.667263', 'step': 5144, 'epoch': 1} {'type': 'loss', 'content': 0.1334526389837265, 'timestamp': '2025-09-10 02:39:50.669690', 'step': 5145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:50.726260', 'step': 5145, 'epoch': 1} {'type': 'loss', 'content': 0.1328144371509552, 'timestamp': '2025-09-10 02:39:50.728182', 'step': 5146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:50.785457', 'step': 5146, 'epoch': 1} {'type': 'loss', 'content': 0.13567808270454407, 'timestamp': '2025-09-10 02:39:50.787767', 'step': 5147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:50.843915', 'step': 5147, 'epoch': 1} {'type': 'loss', 'content': 0.20093199610710144, 'timestamp': '2025-09-10 02:39:50.850336', 'step': 5148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:50.905079', 'step': 5148, 'epoch': 1} {'type': 'loss', 'content': 0.044017303735017776, 'timestamp': '2025-09-10 02:39:50.907123', 'step': 5149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:50.961033', 'step': 5149, 'epoch': 1} {'type': 'loss', 'content': 0.22760720551013947, 'timestamp': '2025-09-10 02:39:50.963122', 'step': 5150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:51.016926', 'step': 5150, 'epoch': 1} {'type': 'loss', 'content': 0.08344147354364395, 'timestamp': '2025-09-10 02:39:51.019070', 'step': 5151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:51.073820', 'step': 5151, 'epoch': 1} {'type': 'loss', 'content': 0.1398453265428543, 'timestamp': '2025-09-10 02:39:51.080097', 'step': 5152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:51.136426', 'step': 5152, 'epoch': 1} {'type': 'loss', 'content': 0.18542863428592682, 'timestamp': '2025-09-10 02:39:51.138377', 'step': 5153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:51.193320', 'step': 5153, 'epoch': 1} {'type': 'loss', 'content': 0.15699364244937897, 'timestamp': '2025-09-10 02:39:51.195736', 'step': 5154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:51.250267', 'step': 5154, 'epoch': 1} {'type': 'loss', 'content': 0.1832870990037918, 'timestamp': '2025-09-10 02:39:51.252690', 'step': 5155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:51.306881', 'step': 5155, 'epoch': 1} {'type': 'loss', 'content': 0.12303191423416138, 'timestamp': '2025-09-10 02:39:51.313882', 'step': 5156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:51.366561', 'step': 5156, 'epoch': 1} {'type': 'loss', 'content': 0.1584334671497345, 'timestamp': '2025-09-10 02:39:51.368562', 'step': 5157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:51.421900', 'step': 5157, 'epoch': 1} {'type': 'loss', 'content': 0.15032051503658295, 'timestamp': '2025-09-10 02:39:51.423971', 'step': 5158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:51.477835', 'step': 5158, 'epoch': 1} {'type': 'loss', 'content': 0.15636862814426422, 'timestamp': '2025-09-10 02:39:51.480045', 'step': 5159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:51.533426', 'step': 5159, 'epoch': 1} {'type': 'loss', 'content': 0.1808374971151352, 'timestamp': '2025-09-10 02:39:51.539324', 'step': 5160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:51.592802', 'step': 5160, 'epoch': 1} {'type': 'loss', 'content': 0.14678966999053955, 'timestamp': '2025-09-10 02:39:51.594975', 'step': 5161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:51.651407', 'step': 5161, 'epoch': 1} {'type': 'loss', 'content': 0.19228368997573853, 'timestamp': '2025-09-10 02:39:51.653686', 'step': 5162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:51.708674', 'step': 5162, 'epoch': 1} {'type': 'loss', 'content': 0.21523062884807587, 'timestamp': '2025-09-10 02:39:51.710889', 'step': 5163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:51.764858', 'step': 5163, 'epoch': 1} {'type': 'loss', 'content': 0.1934514194726944, 'timestamp': '2025-09-10 02:39:51.771097', 'step': 5164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:51.823798', 'step': 5164, 'epoch': 1} {'type': 'loss', 'content': 0.2059287577867508, 'timestamp': '2025-09-10 02:39:51.826006', 'step': 5165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:51.878449', 'step': 5165, 'epoch': 1} {'type': 'loss', 'content': 0.10486706346273422, 'timestamp': '2025-09-10 02:39:51.880569', 'step': 5166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:51.933935', 'step': 5166, 'epoch': 1} {'type': 'loss', 'content': 0.18780528008937836, 'timestamp': '2025-09-10 02:39:51.935987', 'step': 5167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:51.989400', 'step': 5167, 'epoch': 1} {'type': 'loss', 'content': 0.09850556403398514, 'timestamp': '2025-09-10 02:39:51.995221', 'step': 5168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:52.047375', 'step': 5168, 'epoch': 1} {'type': 'loss', 'content': 0.2470294088125229, 'timestamp': '2025-09-10 02:39:52.049444', 'step': 5169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:52.103631', 'step': 5169, 'epoch': 1} {'type': 'loss', 'content': 0.19422854483127594, 'timestamp': '2025-09-10 02:39:52.105932', 'step': 5170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:52.158182', 'step': 5170, 'epoch': 1} {'type': 'loss', 'content': 0.21168294548988342, 'timestamp': '2025-09-10 02:39:52.160123', 'step': 5171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:52.214553', 'step': 5171, 'epoch': 1} {'type': 'loss', 'content': 0.19092117249965668, 'timestamp': '2025-09-10 02:39:52.220547', 'step': 5172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:52.274865', 'step': 5172, 'epoch': 1} {'type': 'loss', 'content': 0.1327669471502304, 'timestamp': '2025-09-10 02:39:52.277016', 'step': 5173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:52.330643', 'step': 5173, 'epoch': 1} {'type': 'loss', 'content': 0.15957003831863403, 'timestamp': '2025-09-10 02:39:52.332713', 'step': 5174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:52.385697', 'step': 5174, 'epoch': 1} {'type': 'loss', 'content': 0.1719210147857666, 'timestamp': '2025-09-10 02:39:52.387802', 'step': 5175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:52.441309', 'step': 5175, 'epoch': 1} {'type': 'loss', 'content': 0.14308685064315796, 'timestamp': '2025-09-10 02:39:52.447480', 'step': 5176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:52.500808', 'step': 5176, 'epoch': 1} {'type': 'loss', 'content': 0.1601540595293045, 'timestamp': '2025-09-10 02:39:52.502885', 'step': 5177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:52.556168', 'step': 5177, 'epoch': 1} {'type': 'loss', 'content': 0.14745499193668365, 'timestamp': '2025-09-10 02:39:52.558342', 'step': 5178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:52.612653', 'step': 5178, 'epoch': 1} {'type': 'loss', 'content': 0.1457337886095047, 'timestamp': '2025-09-10 02:39:52.614574', 'step': 5179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:52.667511', 'step': 5179, 'epoch': 1} {'type': 'loss', 'content': 0.1987578570842743, 'timestamp': '2025-09-10 02:39:52.673345', 'step': 5180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:52.730971', 'step': 5180, 'epoch': 1} {'type': 'loss', 'content': 0.15804623067378998, 'timestamp': '2025-09-10 02:39:52.733562', 'step': 5181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:52.787391', 'step': 5181, 'epoch': 1} {'type': 'loss', 'content': 0.16987180709838867, 'timestamp': '2025-09-10 02:39:52.789551', 'step': 5182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:52.843832', 'step': 5182, 'epoch': 1} {'type': 'loss', 'content': 0.1573546677827835, 'timestamp': '2025-09-10 02:39:52.845924', 'step': 5183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:52.899624', 'step': 5183, 'epoch': 1} {'type': 'loss', 'content': 0.19041807949543, 'timestamp': '2025-09-10 02:39:52.905705', 'step': 5184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:52.959348', 'step': 5184, 'epoch': 1} {'type': 'loss', 'content': 0.2026597559452057, 'timestamp': '2025-09-10 02:39:52.961296', 'step': 5185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:53.014452', 'step': 5185, 'epoch': 1} {'type': 'loss', 'content': 0.1756909191608429, 'timestamp': '2025-09-10 02:39:53.016486', 'step': 5186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:53.069935', 'step': 5186, 'epoch': 1} {'type': 'loss', 'content': 0.14395548403263092, 'timestamp': '2025-09-10 02:39:53.071938', 'step': 5187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:53.125399', 'step': 5187, 'epoch': 1} {'type': 'loss', 'content': 0.24534651637077332, 'timestamp': '2025-09-10 02:39:53.131390', 'step': 5188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:53.185126', 'step': 5188, 'epoch': 1} {'type': 'loss', 'content': 0.1844819337129593, 'timestamp': '2025-09-10 02:39:53.187039', 'step': 5189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:53.241046', 'step': 5189, 'epoch': 1} {'type': 'loss', 'content': 0.17061302065849304, 'timestamp': '2025-09-10 02:39:53.243196', 'step': 5190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:53.297690', 'step': 5190, 'epoch': 1} {'type': 'loss', 'content': 0.12890103459358215, 'timestamp': '2025-09-10 02:39:53.299860', 'step': 5191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:53.353261', 'step': 5191, 'epoch': 1} {'type': 'loss', 'content': 0.18282462656497955, 'timestamp': '2025-09-10 02:39:53.359445', 'step': 5192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:53.412081', 'step': 5192, 'epoch': 1} {'type': 'loss', 'content': 0.15871562063694, 'timestamp': '2025-09-10 02:39:53.414121', 'step': 5193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:53.467376', 'step': 5193, 'epoch': 1} {'type': 'loss', 'content': 0.1814797818660736, 'timestamp': '2025-09-10 02:39:53.469308', 'step': 5194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:53.522401', 'step': 5194, 'epoch': 1} {'type': 'loss', 'content': 0.12806718051433563, 'timestamp': '2025-09-10 02:39:53.526474', 'step': 5195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:53.580141', 'step': 5195, 'epoch': 1} {'type': 'loss', 'content': 0.12268249690532684, 'timestamp': '2025-09-10 02:39:53.589966', 'step': 5196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:53.642932', 'step': 5196, 'epoch': 1} {'type': 'loss', 'content': 0.249556764960289, 'timestamp': '2025-09-10 02:39:53.644991', 'step': 5197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:53.698493', 'step': 5197, 'epoch': 1} {'type': 'loss', 'content': 0.23324494063854218, 'timestamp': '2025-09-10 02:39:53.701094', 'step': 5198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:53.754953', 'step': 5198, 'epoch': 1} {'type': 'loss', 'content': 0.15772880613803864, 'timestamp': '2025-09-10 02:39:53.758539', 'step': 5199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:53.813717', 'step': 5199, 'epoch': 1} {'type': 'loss', 'content': 0.1987764984369278, 'timestamp': '2025-09-10 02:39:53.819917', 'step': 5200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:53.874377', 'step': 5200, 'epoch': 1} {'type': 'loss', 'content': 0.26344895362854004, 'timestamp': '2025-09-10 02:39:53.876293', 'step': 5201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:53.930321', 'step': 5201, 'epoch': 1} {'type': 'loss', 'content': 0.1991233378648758, 'timestamp': '2025-09-10 02:39:53.933588', 'step': 5202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:53.997849', 'step': 5202, 'epoch': 1} {'type': 'loss', 'content': 0.12769177556037903, 'timestamp': '2025-09-10 02:39:54.000003', 'step': 5203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:54.053065', 'step': 5203, 'epoch': 1} {'type': 'loss', 'content': 0.11571474373340607, 'timestamp': '2025-09-10 02:39:54.059189', 'step': 5204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:54.111605', 'step': 5204, 'epoch': 1} {'type': 'loss', 'content': 0.10864438861608505, 'timestamp': '2025-09-10 02:39:54.113665', 'step': 5205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:54.166727', 'step': 5205, 'epoch': 1} {'type': 'loss', 'content': 0.14938116073608398, 'timestamp': '2025-09-10 02:39:54.168752', 'step': 5206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:54.221437', 'step': 5206, 'epoch': 1} {'type': 'loss', 'content': 0.14544600248336792, 'timestamp': '2025-09-10 02:39:54.224199', 'step': 5207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:54.279285', 'step': 5207, 'epoch': 1} {'type': 'loss', 'content': 0.2073569893836975, 'timestamp': '2025-09-10 02:39:54.285487', 'step': 5208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:54.340627', 'step': 5208, 'epoch': 1} {'type': 'loss', 'content': 0.22358201444149017, 'timestamp': '2025-09-10 02:39:54.342694', 'step': 5209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:54.396766', 'step': 5209, 'epoch': 1} {'type': 'loss', 'content': 0.17207302153110504, 'timestamp': '2025-09-10 02:39:54.400499', 'step': 5210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:54.454225', 'step': 5210, 'epoch': 1} {'type': 'loss', 'content': 0.14704515039920807, 'timestamp': '2025-09-10 02:39:54.456372', 'step': 5211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:54.509831', 'step': 5211, 'epoch': 1} {'type': 'loss', 'content': 0.18516333401203156, 'timestamp': '2025-09-10 02:39:54.516076', 'step': 5212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:54.568905', 'step': 5212, 'epoch': 1} {'type': 'loss', 'content': 0.16473671793937683, 'timestamp': '2025-09-10 02:39:54.570931', 'step': 5213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:54.624253', 'step': 5213, 'epoch': 1} {'type': 'loss', 'content': 0.21732603013515472, 'timestamp': '2025-09-10 02:39:54.626346', 'step': 5214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:54.687605', 'step': 5214, 'epoch': 1} {'type': 'loss', 'content': 0.14449267089366913, 'timestamp': '2025-09-10 02:39:54.689783', 'step': 5215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:54.742676', 'step': 5215, 'epoch': 1} {'type': 'loss', 'content': 0.15451949834823608, 'timestamp': '2025-09-10 02:39:54.748657', 'step': 5216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:54.802298', 'step': 5216, 'epoch': 1} {'type': 'loss', 'content': 0.1701677441596985, 'timestamp': '2025-09-10 02:39:54.804624', 'step': 5217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:54.860296', 'step': 5217, 'epoch': 1} {'type': 'loss', 'content': 0.1782061606645584, 'timestamp': '2025-09-10 02:39:54.862360', 'step': 5218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:54.916848', 'step': 5218, 'epoch': 1} {'type': 'loss', 'content': 0.12603254616260529, 'timestamp': '2025-09-10 02:39:54.919021', 'step': 5219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:54.978122', 'step': 5219, 'epoch': 1} {'type': 'loss', 'content': 0.17681439220905304, 'timestamp': '2025-09-10 02:39:54.984457', 'step': 5220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:55.037401', 'step': 5220, 'epoch': 1} {'type': 'loss', 'content': 0.22696715593338013, 'timestamp': '2025-09-10 02:39:55.039355', 'step': 5221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:55.092121', 'step': 5221, 'epoch': 1} {'type': 'loss', 'content': 0.26577821373939514, 'timestamp': '2025-09-10 02:39:55.094361', 'step': 5222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:55.148149', 'step': 5222, 'epoch': 1} {'type': 'loss', 'content': 0.09196535497903824, 'timestamp': '2025-09-10 02:39:55.150230', 'step': 5223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:55.203104', 'step': 5223, 'epoch': 1} {'type': 'loss', 'content': 0.1651231199502945, 'timestamp': '2025-09-10 02:39:55.209145', 'step': 5224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:55.261969', 'step': 5224, 'epoch': 1} {'type': 'loss', 'content': 0.12793105840682983, 'timestamp': '2025-09-10 02:39:55.264322', 'step': 5225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:55.317875', 'step': 5225, 'epoch': 1} {'type': 'loss', 'content': 0.11157878488302231, 'timestamp': '2025-09-10 02:39:55.320314', 'step': 5226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:55.373795', 'step': 5226, 'epoch': 1} {'type': 'loss', 'content': 0.17762407660484314, 'timestamp': '2025-09-10 02:39:55.375956', 'step': 5227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:55.430661', 'step': 5227, 'epoch': 1} {'type': 'loss', 'content': 0.09456923604011536, 'timestamp': '2025-09-10 02:39:55.437080', 'step': 5228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:55.490686', 'step': 5228, 'epoch': 1} {'type': 'loss', 'content': 0.19288361072540283, 'timestamp': '2025-09-10 02:39:55.492892', 'step': 5229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:55.547303', 'step': 5229, 'epoch': 1} {'type': 'loss', 'content': 0.1796324998140335, 'timestamp': '2025-09-10 02:39:55.549491', 'step': 5230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:55.603919', 'step': 5230, 'epoch': 1} {'type': 'loss', 'content': 0.09284213185310364, 'timestamp': '2025-09-10 02:39:55.606101', 'step': 5231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:55.661105', 'step': 5231, 'epoch': 1} {'type': 'loss', 'content': 0.21292831003665924, 'timestamp': '2025-09-10 02:39:55.667591', 'step': 5232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:55.721783', 'step': 5232, 'epoch': 1} {'type': 'loss', 'content': 0.15442869067192078, 'timestamp': '2025-09-10 02:39:55.724060', 'step': 5233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:55.778553', 'step': 5233, 'epoch': 1} {'type': 'loss', 'content': 0.26528018712997437, 'timestamp': '2025-09-10 02:39:55.780774', 'step': 5234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:55.835664', 'step': 5234, 'epoch': 1} {'type': 'loss', 'content': 0.06544546037912369, 'timestamp': '2025-09-10 02:39:55.837697', 'step': 5235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:55.892398', 'step': 5235, 'epoch': 1} {'type': 'loss', 'content': 0.13131466507911682, 'timestamp': '2025-09-10 02:39:55.898432', 'step': 5236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:55.953024', 'step': 5236, 'epoch': 1} {'type': 'loss', 'content': 0.11618483066558838, 'timestamp': '2025-09-10 02:39:55.955229', 'step': 5237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:56.011290', 'step': 5237, 'epoch': 1} {'type': 'loss', 'content': 0.23502697050571442, 'timestamp': '2025-09-10 02:39:56.013469', 'step': 5238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:56.068187', 'step': 5238, 'epoch': 1} {'type': 'loss', 'content': 0.12636858224868774, 'timestamp': '2025-09-10 02:39:56.070409', 'step': 5239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:56.126656', 'step': 5239, 'epoch': 1} {'type': 'loss', 'content': 0.21486184000968933, 'timestamp': '2025-09-10 02:39:56.132678', 'step': 5240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:56.188415', 'step': 5240, 'epoch': 1} {'type': 'loss', 'content': 0.14463235437870026, 'timestamp': '2025-09-10 02:39:56.190571', 'step': 5241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:56.246450', 'step': 5241, 'epoch': 1} {'type': 'loss', 'content': 0.1481795758008957, 'timestamp': '2025-09-10 02:39:56.248815', 'step': 5242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:56.303135', 'step': 5242, 'epoch': 1} {'type': 'loss', 'content': 0.13140369951725006, 'timestamp': '2025-09-10 02:39:56.305423', 'step': 5243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:56.359074', 'step': 5243, 'epoch': 1} {'type': 'loss', 'content': 0.15139397978782654, 'timestamp': '2025-09-10 02:39:56.365397', 'step': 5244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:56.419725', 'step': 5244, 'epoch': 1} {'type': 'loss', 'content': 0.07466694712638855, 'timestamp': '2025-09-10 02:39:56.421792', 'step': 5245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:56.476331', 'step': 5245, 'epoch': 1} {'type': 'loss', 'content': 0.145277202129364, 'timestamp': '2025-09-10 02:39:56.478609', 'step': 5246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:56.533496', 'step': 5246, 'epoch': 1} {'type': 'loss', 'content': 0.17614145576953888, 'timestamp': '2025-09-10 02:39:56.535706', 'step': 5247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:56.590189', 'step': 5247, 'epoch': 1} {'type': 'loss', 'content': 0.12966355681419373, 'timestamp': '2025-09-10 02:39:56.596570', 'step': 5248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:56.650745', 'step': 5248, 'epoch': 1} {'type': 'loss', 'content': 0.16278669238090515, 'timestamp': '2025-09-10 02:39:56.652946', 'step': 5249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:56.706754', 'step': 5249, 'epoch': 1} {'type': 'loss', 'content': 0.17993508279323578, 'timestamp': '2025-09-10 02:39:56.708943', 'step': 5250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:56.763216', 'step': 5250, 'epoch': 1} {'type': 'loss', 'content': 0.10107177495956421, 'timestamp': '2025-09-10 02:39:56.765368', 'step': 5251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:56.820195', 'step': 5251, 'epoch': 1} {'type': 'loss', 'content': 0.1532370001077652, 'timestamp': '2025-09-10 02:39:56.826557', 'step': 5252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:56.880949', 'step': 5252, 'epoch': 1} {'type': 'loss', 'content': 0.2431374490261078, 'timestamp': '2025-09-10 02:39:56.883015', 'step': 5253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:56.941374', 'step': 5253, 'epoch': 1} {'type': 'loss', 'content': 0.1882101595401764, 'timestamp': '2025-09-10 02:39:56.943434', 'step': 5254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:56.997617', 'step': 5254, 'epoch': 1} {'type': 'loss', 'content': 0.1715882569551468, 'timestamp': '2025-09-10 02:39:56.999985', 'step': 5255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:57.055078', 'step': 5255, 'epoch': 1} {'type': 'loss', 'content': 0.16886509954929352, 'timestamp': '2025-09-10 02:39:57.061481', 'step': 5256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:57.115550', 'step': 5256, 'epoch': 1} {'type': 'loss', 'content': 0.22224347293376923, 'timestamp': '2025-09-10 02:39:57.117579', 'step': 5257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:57.171712', 'step': 5257, 'epoch': 1} {'type': 'loss', 'content': 0.11136933416128159, 'timestamp': '2025-09-10 02:39:57.173978', 'step': 5258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:57.228685', 'step': 5258, 'epoch': 1} {'type': 'loss', 'content': 0.06428686529397964, 'timestamp': '2025-09-10 02:39:57.230815', 'step': 5259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:57.285498', 'step': 5259, 'epoch': 1} {'type': 'loss', 'content': 0.12920360267162323, 'timestamp': '2025-09-10 02:39:57.291703', 'step': 5260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:57.345063', 'step': 5260, 'epoch': 1} {'type': 'loss', 'content': 0.11692213267087936, 'timestamp': '2025-09-10 02:39:57.347131', 'step': 5261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:57.401551', 'step': 5261, 'epoch': 1} {'type': 'loss', 'content': 0.10463344305753708, 'timestamp': '2025-09-10 02:39:57.403901', 'step': 5262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:57.458521', 'step': 5262, 'epoch': 1} {'type': 'loss', 'content': 0.07380114495754242, 'timestamp': '2025-09-10 02:39:57.460467', 'step': 5263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:57.514310', 'step': 5263, 'epoch': 1} {'type': 'loss', 'content': 0.153590589761734, 'timestamp': '2025-09-10 02:39:57.520512', 'step': 5264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:57.575798', 'step': 5264, 'epoch': 1} {'type': 'loss', 'content': 0.28384315967559814, 'timestamp': '2025-09-10 02:39:57.578172', 'step': 5265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:57.633249', 'step': 5265, 'epoch': 1} {'type': 'loss', 'content': 0.20377029478549957, 'timestamp': '2025-09-10 02:39:57.635299', 'step': 5266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:57.690607', 'step': 5266, 'epoch': 1} {'type': 'loss', 'content': 0.14053450524806976, 'timestamp': '2025-09-10 02:39:57.692915', 'step': 5267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:57.749200', 'step': 5267, 'epoch': 1} {'type': 'loss', 'content': 0.1050117090344429, 'timestamp': '2025-09-10 02:39:57.755438', 'step': 5268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:57.809450', 'step': 5268, 'epoch': 1} {'type': 'loss', 'content': 0.18937574326992035, 'timestamp': '2025-09-10 02:39:57.811733', 'step': 5269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:57.865784', 'step': 5269, 'epoch': 1} {'type': 'loss', 'content': 0.13338081538677216, 'timestamp': '2025-09-10 02:39:57.868166', 'step': 5270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:57.922523', 'step': 5270, 'epoch': 1} {'type': 'loss', 'content': 0.20563679933547974, 'timestamp': '2025-09-10 02:39:57.925247', 'step': 5271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:57.981792', 'step': 5271, 'epoch': 1} {'type': 'loss', 'content': 0.10475366562604904, 'timestamp': '2025-09-10 02:39:57.988169', 'step': 5272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:58.041730', 'step': 5272, 'epoch': 1} {'type': 'loss', 'content': 0.19056445360183716, 'timestamp': '2025-09-10 02:39:58.043821', 'step': 5273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:58.098484', 'step': 5273, 'epoch': 1} {'type': 'loss', 'content': 0.26358747482299805, 'timestamp': '2025-09-10 02:39:58.100725', 'step': 5274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:58.155883', 'step': 5274, 'epoch': 1} {'type': 'loss', 'content': 0.25446757674217224, 'timestamp': '2025-09-10 02:39:58.158060', 'step': 5275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:58.212175', 'step': 5275, 'epoch': 1} {'type': 'loss', 'content': 0.1491924524307251, 'timestamp': '2025-09-10 02:39:58.218356', 'step': 5276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:58.271959', 'step': 5276, 'epoch': 1} {'type': 'loss', 'content': 0.1204826608300209, 'timestamp': '2025-09-10 02:39:58.273927', 'step': 5277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:58.328258', 'step': 5277, 'epoch': 1} {'type': 'loss', 'content': 0.1509382575750351, 'timestamp': '2025-09-10 02:39:58.330321', 'step': 5278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:58.384702', 'step': 5278, 'epoch': 1} {'type': 'loss', 'content': 0.31466278433799744, 'timestamp': '2025-09-10 02:39:58.386751', 'step': 5279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:58.440534', 'step': 5279, 'epoch': 1} {'type': 'loss', 'content': 0.12007454037666321, 'timestamp': '2025-09-10 02:39:58.446898', 'step': 5280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:58.500211', 'step': 5280, 'epoch': 1} {'type': 'loss', 'content': 0.1824594885110855, 'timestamp': '2025-09-10 02:39:58.502151', 'step': 5281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:58.557424', 'step': 5281, 'epoch': 1} {'type': 'loss', 'content': 0.1942969113588333, 'timestamp': '2025-09-10 02:39:58.559660', 'step': 5282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:58.615308', 'step': 5282, 'epoch': 1} {'type': 'loss', 'content': 0.13681623339653015, 'timestamp': '2025-09-10 02:39:58.617580', 'step': 5283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:58.675493', 'step': 5283, 'epoch': 1} {'type': 'loss', 'content': 0.1242980808019638, 'timestamp': '2025-09-10 02:39:58.682277', 'step': 5284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:58.737710', 'step': 5284, 'epoch': 1} {'type': 'loss', 'content': 0.19360923767089844, 'timestamp': '2025-09-10 02:39:58.740004', 'step': 5285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:58.795389', 'step': 5285, 'epoch': 1} {'type': 'loss', 'content': 0.2577936351299286, 'timestamp': '2025-09-10 02:39:58.797733', 'step': 5286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:58.854003', 'step': 5286, 'epoch': 1} {'type': 'loss', 'content': 0.1474509984254837, 'timestamp': '2025-09-10 02:39:58.856270', 'step': 5287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:58.912133', 'step': 5287, 'epoch': 1} {'type': 'loss', 'content': 0.10686856508255005, 'timestamp': '2025-09-10 02:39:58.918588', 'step': 5288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:58.983089', 'step': 5288, 'epoch': 1} {'type': 'loss', 'content': 0.273467481136322, 'timestamp': '2025-09-10 02:39:58.985610', 'step': 5289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:59.040363', 'step': 5289, 'epoch': 1} {'type': 'loss', 'content': 0.11625470221042633, 'timestamp': '2025-09-10 02:39:59.042390', 'step': 5290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:39:59.097664', 'step': 5290, 'epoch': 1} {'type': 'loss', 'content': 0.27195149660110474, 'timestamp': '2025-09-10 02:39:59.099847', 'step': 5291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:59.156672', 'step': 5291, 'epoch': 1} {'type': 'loss', 'content': 0.19573263823986053, 'timestamp': '2025-09-10 02:39:59.163925', 'step': 5292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:59.219899', 'step': 5292, 'epoch': 1} {'type': 'loss', 'content': 0.2308805137872696, 'timestamp': '2025-09-10 02:39:59.222004', 'step': 5293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:59.277628', 'step': 5293, 'epoch': 1} {'type': 'loss', 'content': 0.16744869947433472, 'timestamp': '2025-09-10 02:39:59.279799', 'step': 5294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:59.336814', 'step': 5294, 'epoch': 1} {'type': 'loss', 'content': 0.10836278647184372, 'timestamp': '2025-09-10 02:39:59.338950', 'step': 5295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:59.394376', 'step': 5295, 'epoch': 1} {'type': 'loss', 'content': 0.1361193060874939, 'timestamp': '2025-09-10 02:39:59.400692', 'step': 5296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:59.454967', 'step': 5296, 'epoch': 1} {'type': 'loss', 'content': 0.18523474037647247, 'timestamp': '2025-09-10 02:39:59.457044', 'step': 5297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:59.512309', 'step': 5297, 'epoch': 1} {'type': 'loss', 'content': 0.1722307652235031, 'timestamp': '2025-09-10 02:39:59.514665', 'step': 5298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:59.571950', 'step': 5298, 'epoch': 1} {'type': 'loss', 'content': 0.17038273811340332, 'timestamp': '2025-09-10 02:39:59.574311', 'step': 5299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:39:59.629414', 'step': 5299, 'epoch': 1} {'type': 'loss', 'content': 0.15147149562835693, 'timestamp': '2025-09-10 02:39:59.635810', 'step': 5300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:59.691349', 'step': 5300, 'epoch': 1} {'type': 'loss', 'content': 0.12400905042886734, 'timestamp': '2025-09-10 02:39:59.693550', 'step': 5301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:59.749378', 'step': 5301, 'epoch': 1} {'type': 'loss', 'content': 0.18603463470935822, 'timestamp': '2025-09-10 02:39:59.751684', 'step': 5302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:59.806908', 'step': 5302, 'epoch': 1} {'type': 'loss', 'content': 0.2418169379234314, 'timestamp': '2025-09-10 02:39:59.809123', 'step': 5303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:59.863699', 'step': 5303, 'epoch': 1} {'type': 'loss', 'content': 0.11418221890926361, 'timestamp': '2025-09-10 02:39:59.870050', 'step': 5304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:39:59.925358', 'step': 5304, 'epoch': 1} {'type': 'loss', 'content': 0.10908039659261703, 'timestamp': '2025-09-10 02:39:59.927471', 'step': 5305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:59.982308', 'step': 5305, 'epoch': 1} {'type': 'loss', 'content': 0.1982644498348236, 'timestamp': '2025-09-10 02:39:59.984484', 'step': 5306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:00.039216', 'step': 5306, 'epoch': 1} {'type': 'loss', 'content': 0.1264091581106186, 'timestamp': '2025-09-10 02:40:00.041590', 'step': 5307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:00.095690', 'step': 5307, 'epoch': 1} {'type': 'loss', 'content': 0.1524890661239624, 'timestamp': '2025-09-10 02:40:00.102025', 'step': 5308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:00.155812', 'step': 5308, 'epoch': 1} {'type': 'loss', 'content': 0.1582099199295044, 'timestamp': '2025-09-10 02:40:00.158061', 'step': 5309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:00.213778', 'step': 5309, 'epoch': 1} {'type': 'loss', 'content': 0.1575179398059845, 'timestamp': '2025-09-10 02:40:00.215848', 'step': 5310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:00.272069', 'step': 5310, 'epoch': 1} {'type': 'loss', 'content': 0.09831490367650986, 'timestamp': '2025-09-10 02:40:00.274423', 'step': 5311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:00.329448', 'step': 5311, 'epoch': 1} {'type': 'loss', 'content': 0.15215902030467987, 'timestamp': '2025-09-10 02:40:00.336049', 'step': 5312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:00.392621', 'step': 5312, 'epoch': 1} {'type': 'loss', 'content': 0.2191908210515976, 'timestamp': '2025-09-10 02:40:00.394844', 'step': 5313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:00.451820', 'step': 5313, 'epoch': 1} {'type': 'loss', 'content': 0.2637805938720703, 'timestamp': '2025-09-10 02:40:00.454060', 'step': 5314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:00.510699', 'step': 5314, 'epoch': 1} {'type': 'loss', 'content': 0.11016686260700226, 'timestamp': '2025-09-10 02:40:00.512875', 'step': 5315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:00.569656', 'step': 5315, 'epoch': 1} {'type': 'loss', 'content': 0.19989009201526642, 'timestamp': '2025-09-10 02:40:00.576497', 'step': 5316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:00.631742', 'step': 5316, 'epoch': 1} {'type': 'loss', 'content': 0.09425076097249985, 'timestamp': '2025-09-10 02:40:00.633877', 'step': 5317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:00.690161', 'step': 5317, 'epoch': 1} {'type': 'loss', 'content': 0.14174173772335052, 'timestamp': '2025-09-10 02:40:00.692344', 'step': 5318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:00.749818', 'step': 5318, 'epoch': 1} {'type': 'loss', 'content': 0.16326147317886353, 'timestamp': '2025-09-10 02:40:00.752035', 'step': 5319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:00.810371', 'step': 5319, 'epoch': 1} {'type': 'loss', 'content': 0.16465924680233002, 'timestamp': '2025-09-10 02:40:00.817111', 'step': 5320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:00.874411', 'step': 5320, 'epoch': 1} {'type': 'loss', 'content': 0.13183195888996124, 'timestamp': '2025-09-10 02:40:00.877843', 'step': 5321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:00.935247', 'step': 5321, 'epoch': 1} {'type': 'loss', 'content': 0.10262148827314377, 'timestamp': '2025-09-10 02:40:00.937484', 'step': 5322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:00.996323', 'step': 5322, 'epoch': 1} {'type': 'loss', 'content': 0.19651424884796143, 'timestamp': '2025-09-10 02:40:00.998406', 'step': 5323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:01.056512', 'step': 5323, 'epoch': 1} {'type': 'loss', 'content': 0.25014814734458923, 'timestamp': '2025-09-10 02:40:01.063331', 'step': 5324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:01.119554', 'step': 5324, 'epoch': 1} {'type': 'loss', 'content': 0.17314976453781128, 'timestamp': '2025-09-10 02:40:01.121626', 'step': 5325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:01.177463', 'step': 5325, 'epoch': 1} {'type': 'loss', 'content': 0.12836305797100067, 'timestamp': '2025-09-10 02:40:01.179506', 'step': 5326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:01.233783', 'step': 5326, 'epoch': 1} {'type': 'loss', 'content': 0.13727504014968872, 'timestamp': '2025-09-10 02:40:01.235729', 'step': 5327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:01.291147', 'step': 5327, 'epoch': 1} {'type': 'loss', 'content': 0.12704820930957794, 'timestamp': '2025-09-10 02:40:01.297452', 'step': 5328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:01.351505', 'step': 5328, 'epoch': 1} {'type': 'loss', 'content': 0.22065530717372894, 'timestamp': '2025-09-10 02:40:01.353579', 'step': 5329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:01.407419', 'step': 5329, 'epoch': 1} {'type': 'loss', 'content': 0.1491393744945526, 'timestamp': '2025-09-10 02:40:01.409587', 'step': 5330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:01.463388', 'step': 5330, 'epoch': 1} {'type': 'loss', 'content': 0.20293907821178436, 'timestamp': '2025-09-10 02:40:01.465358', 'step': 5331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:01.518279', 'step': 5331, 'epoch': 1} {'type': 'loss', 'content': 0.18191346526145935, 'timestamp': '2025-09-10 02:40:01.524150', 'step': 5332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:01.577206', 'step': 5332, 'epoch': 1} {'type': 'loss', 'content': 0.18147136270999908, 'timestamp': '2025-09-10 02:40:01.579119', 'step': 5333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:01.632308', 'step': 5333, 'epoch': 1} {'type': 'loss', 'content': 0.10387525707483292, 'timestamp': '2025-09-10 02:40:01.634702', 'step': 5334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:01.688401', 'step': 5334, 'epoch': 1} {'type': 'loss', 'content': 0.23186375200748444, 'timestamp': '2025-09-10 02:40:01.690445', 'step': 5335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:01.746012', 'step': 5335, 'epoch': 1} {'type': 'loss', 'content': 0.18448464572429657, 'timestamp': '2025-09-10 02:40:01.752338', 'step': 5336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:01.805839', 'step': 5336, 'epoch': 1} {'type': 'loss', 'content': 0.18673110008239746, 'timestamp': '2025-09-10 02:40:01.808115', 'step': 5337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:01.862893', 'step': 5337, 'epoch': 1} {'type': 'loss', 'content': 0.22363032400608063, 'timestamp': '2025-09-10 02:40:01.864956', 'step': 5338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:01.919342', 'step': 5338, 'epoch': 1} {'type': 'loss', 'content': 0.16869309544563293, 'timestamp': '2025-09-10 02:40:01.921327', 'step': 5339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:01.975199', 'step': 5339, 'epoch': 1} {'type': 'loss', 'content': 0.15148930251598358, 'timestamp': '2025-09-10 02:40:01.981619', 'step': 5340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:02.034714', 'step': 5340, 'epoch': 1} {'type': 'loss', 'content': 0.07939156889915466, 'timestamp': '2025-09-10 02:40:02.036653', 'step': 5341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:02.090167', 'step': 5341, 'epoch': 1} {'type': 'loss', 'content': 0.058368388563394547, 'timestamp': '2025-09-10 02:40:02.092264', 'step': 5342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:02.145262', 'step': 5342, 'epoch': 1} {'type': 'loss', 'content': 0.1927088499069214, 'timestamp': '2025-09-10 02:40:02.147481', 'step': 5343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:02.201898', 'step': 5343, 'epoch': 1} {'type': 'loss', 'content': 0.16857969760894775, 'timestamp': '2025-09-10 02:40:02.207889', 'step': 5344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:02.260132', 'step': 5344, 'epoch': 1} {'type': 'loss', 'content': 0.21726515889167786, 'timestamp': '2025-09-10 02:40:02.262062', 'step': 5345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:02.314257', 'step': 5345, 'epoch': 1} {'type': 'loss', 'content': 0.22889132797718048, 'timestamp': '2025-09-10 02:40:02.316192', 'step': 5346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:02.368540', 'step': 5346, 'epoch': 1} {'type': 'loss', 'content': 0.19707173109054565, 'timestamp': '2025-09-10 02:40:02.370585', 'step': 5347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:02.423405', 'step': 5347, 'epoch': 1} {'type': 'loss', 'content': 0.22344638407230377, 'timestamp': '2025-09-10 02:40:02.429410', 'step': 5348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:02.481857', 'step': 5348, 'epoch': 1} {'type': 'loss', 'content': 0.1615571677684784, 'timestamp': '2025-09-10 02:40:02.483992', 'step': 5349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:02.536679', 'step': 5349, 'epoch': 1} {'type': 'loss', 'content': 0.21576030552387238, 'timestamp': '2025-09-10 02:40:02.538834', 'step': 5350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:02.591861', 'step': 5350, 'epoch': 1} {'type': 'loss', 'content': 0.2104615569114685, 'timestamp': '2025-09-10 02:40:02.593931', 'step': 5351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:02.647629', 'step': 5351, 'epoch': 1} {'type': 'loss', 'content': 0.2144673615694046, 'timestamp': '2025-09-10 02:40:02.653570', 'step': 5352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:02.707358', 'step': 5352, 'epoch': 1} {'type': 'loss', 'content': 0.12758149206638336, 'timestamp': '2025-09-10 02:40:02.709560', 'step': 5353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:02.762682', 'step': 5353, 'epoch': 1} {'type': 'loss', 'content': 0.13706758618354797, 'timestamp': '2025-09-10 02:40:02.764760', 'step': 5354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:02.817776', 'step': 5354, 'epoch': 1} {'type': 'loss', 'content': 0.2206781506538391, 'timestamp': '2025-09-10 02:40:02.823185', 'step': 5355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:02.876689', 'step': 5355, 'epoch': 1} {'type': 'loss', 'content': 0.20030203461647034, 'timestamp': '2025-09-10 02:40:02.882610', 'step': 5356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:02.935355', 'step': 5356, 'epoch': 1} {'type': 'loss', 'content': 0.15977036952972412, 'timestamp': '2025-09-10 02:40:02.937669', 'step': 5357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:02.991193', 'step': 5357, 'epoch': 1} {'type': 'loss', 'content': 0.2114206850528717, 'timestamp': '2025-09-10 02:40:02.993612', 'step': 5358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:03.047000', 'step': 5358, 'epoch': 1} {'type': 'loss', 'content': 0.11856312304735184, 'timestamp': '2025-09-10 02:40:03.049683', 'step': 5359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:03.104008', 'step': 5359, 'epoch': 1} {'type': 'loss', 'content': 0.1662203073501587, 'timestamp': '2025-09-10 02:40:03.109905', 'step': 5360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:03.162761', 'step': 5360, 'epoch': 1} {'type': 'loss', 'content': 0.12587304413318634, 'timestamp': '2025-09-10 02:40:03.165117', 'step': 5361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:03.218328', 'step': 5361, 'epoch': 1} {'type': 'loss', 'content': 0.13836972415447235, 'timestamp': '2025-09-10 02:40:03.220505', 'step': 5362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:03.281961', 'step': 5362, 'epoch': 1} {'type': 'loss', 'content': 0.15055446326732635, 'timestamp': '2025-09-10 02:40:03.284182', 'step': 5363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:03.336968', 'step': 5363, 'epoch': 1} {'type': 'loss', 'content': 0.12204638868570328, 'timestamp': '2025-09-10 02:40:03.342779', 'step': 5364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:03.400809', 'step': 5364, 'epoch': 1} {'type': 'loss', 'content': 0.1310841590166092, 'timestamp': '2025-09-10 02:40:03.402894', 'step': 5365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:03.459535', 'step': 5365, 'epoch': 1} {'type': 'loss', 'content': 0.14654044806957245, 'timestamp': '2025-09-10 02:40:03.461667', 'step': 5366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:03.515140', 'step': 5366, 'epoch': 1} {'type': 'loss', 'content': 0.12441913783550262, 'timestamp': '2025-09-10 02:40:03.517205', 'step': 5367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:03.570420', 'step': 5367, 'epoch': 1} {'type': 'loss', 'content': 0.14237993955612183, 'timestamp': '2025-09-10 02:40:03.576433', 'step': 5368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:03.629797', 'step': 5368, 'epoch': 1} {'type': 'loss', 'content': 0.19041335582733154, 'timestamp': '2025-09-10 02:40:03.631943', 'step': 5369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:03.686288', 'step': 5369, 'epoch': 1} {'type': 'loss', 'content': 0.13092656433582306, 'timestamp': '2025-09-10 02:40:03.688596', 'step': 5370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:03.742181', 'step': 5370, 'epoch': 1} {'type': 'loss', 'content': 0.15347632765769958, 'timestamp': '2025-09-10 02:40:03.744504', 'step': 5371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:03.798419', 'step': 5371, 'epoch': 1} {'type': 'loss', 'content': 0.1734500378370285, 'timestamp': '2025-09-10 02:40:03.804422', 'step': 5372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:03.856975', 'step': 5372, 'epoch': 1} {'type': 'loss', 'content': 0.15875457227230072, 'timestamp': '2025-09-10 02:40:03.859107', 'step': 5373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:03.912015', 'step': 5373, 'epoch': 1} {'type': 'loss', 'content': 0.13647495210170746, 'timestamp': '2025-09-10 02:40:03.914350', 'step': 5374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:03.973033', 'step': 5374, 'epoch': 1} {'type': 'loss', 'content': 0.08443786948919296, 'timestamp': '2025-09-10 02:40:03.975283', 'step': 5375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:04.029537', 'step': 5375, 'epoch': 1} {'type': 'loss', 'content': 0.14038677513599396, 'timestamp': '2025-09-10 02:40:04.035380', 'step': 5376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:04.087765', 'step': 5376, 'epoch': 1} {'type': 'loss', 'content': 0.13672852516174316, 'timestamp': '2025-09-10 02:40:04.089923', 'step': 5377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:04.142937', 'step': 5377, 'epoch': 1} {'type': 'loss', 'content': 0.15161220729351044, 'timestamp': '2025-09-10 02:40:04.144893', 'step': 5378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:04.198624', 'step': 5378, 'epoch': 1} {'type': 'loss', 'content': 0.23076114058494568, 'timestamp': '2025-09-10 02:40:04.200830', 'step': 5379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:04.253722', 'step': 5379, 'epoch': 1} {'type': 'loss', 'content': 0.1936608999967575, 'timestamp': '2025-09-10 02:40:04.259510', 'step': 5380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:04.313412', 'step': 5380, 'epoch': 1} {'type': 'loss', 'content': 0.12032121419906616, 'timestamp': '2025-09-10 02:40:04.315541', 'step': 5381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:04.368705', 'step': 5381, 'epoch': 1} {'type': 'loss', 'content': 0.12747271358966827, 'timestamp': '2025-09-10 02:40:04.370865', 'step': 5382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:04.424868', 'step': 5382, 'epoch': 1} {'type': 'loss', 'content': 0.11740513890981674, 'timestamp': '2025-09-10 02:40:04.426815', 'step': 5383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:04.480471', 'step': 5383, 'epoch': 1} {'type': 'loss', 'content': 0.16306160390377045, 'timestamp': '2025-09-10 02:40:04.486352', 'step': 5384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:04.539816', 'step': 5384, 'epoch': 1} {'type': 'loss', 'content': 0.12766021490097046, 'timestamp': '2025-09-10 02:40:04.542094', 'step': 5385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:04.596602', 'step': 5385, 'epoch': 1} {'type': 'loss', 'content': 0.1241668090224266, 'timestamp': '2025-09-10 02:40:04.598846', 'step': 5386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:04.652506', 'step': 5386, 'epoch': 1} {'type': 'loss', 'content': 0.13902321457862854, 'timestamp': '2025-09-10 02:40:04.654622', 'step': 5387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:04.707667', 'step': 5387, 'epoch': 1} {'type': 'loss', 'content': 0.17575284838676453, 'timestamp': '2025-09-10 02:40:04.713618', 'step': 5388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:04.766021', 'step': 5388, 'epoch': 1} {'type': 'loss', 'content': 0.12933026254177094, 'timestamp': '2025-09-10 02:40:04.768146', 'step': 5389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:04.821633', 'step': 5389, 'epoch': 1} {'type': 'loss', 'content': 0.21295547485351562, 'timestamp': '2025-09-10 02:40:04.823845', 'step': 5390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:04.876820', 'step': 5390, 'epoch': 1} {'type': 'loss', 'content': 0.1955259144306183, 'timestamp': '2025-09-10 02:40:04.878746', 'step': 5391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:04.931216', 'step': 5391, 'epoch': 1} {'type': 'loss', 'content': 0.23541845381259918, 'timestamp': '2025-09-10 02:40:04.937024', 'step': 5392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:04.991424', 'step': 5392, 'epoch': 1} {'type': 'loss', 'content': 0.08799070864915848, 'timestamp': '2025-09-10 02:40:04.993611', 'step': 5393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:05.046789', 'step': 5393, 'epoch': 1} {'type': 'loss', 'content': 0.13767032325267792, 'timestamp': '2025-09-10 02:40:05.048967', 'step': 5394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:05.103519', 'step': 5394, 'epoch': 1} {'type': 'loss', 'content': 0.23693428933620453, 'timestamp': '2025-09-10 02:40:05.105537', 'step': 5395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:05.158855', 'step': 5395, 'epoch': 1} {'type': 'loss', 'content': 0.21495820581912994, 'timestamp': '2025-09-10 02:40:05.164730', 'step': 5396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:05.217676', 'step': 5396, 'epoch': 1} {'type': 'loss', 'content': 0.12958785891532898, 'timestamp': '2025-09-10 02:40:05.220091', 'step': 5397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:05.272809', 'step': 5397, 'epoch': 1} {'type': 'loss', 'content': 0.19226638972759247, 'timestamp': '2025-09-10 02:40:05.274919', 'step': 5398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:05.335910', 'step': 5398, 'epoch': 1} {'type': 'loss', 'content': 0.22319161891937256, 'timestamp': '2025-09-10 02:40:05.338004', 'step': 5399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:05.394227', 'step': 5399, 'epoch': 1} {'type': 'loss', 'content': 0.18246838450431824, 'timestamp': '2025-09-10 02:40:05.400094', 'step': 5400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:05.452810', 'step': 5400, 'epoch': 1} {'type': 'loss', 'content': 0.1186150535941124, 'timestamp': '2025-09-10 02:40:05.454930', 'step': 5401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:05.507965', 'step': 5401, 'epoch': 1} {'type': 'loss', 'content': 0.14882919192314148, 'timestamp': '2025-09-10 02:40:05.509918', 'step': 5402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:05.562938', 'step': 5402, 'epoch': 1} {'type': 'loss', 'content': 0.15111137926578522, 'timestamp': '2025-09-10 02:40:05.564862', 'step': 5403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:05.618055', 'step': 5403, 'epoch': 1} {'type': 'loss', 'content': 0.13351573050022125, 'timestamp': '2025-09-10 02:40:05.623873', 'step': 5404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:05.676674', 'step': 5404, 'epoch': 1} {'type': 'loss', 'content': 0.13161425292491913, 'timestamp': '2025-09-10 02:40:05.678694', 'step': 5405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:05.732112', 'step': 5405, 'epoch': 1} {'type': 'loss', 'content': 0.18605223298072815, 'timestamp': '2025-09-10 02:40:05.734347', 'step': 5406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:05.788017', 'step': 5406, 'epoch': 1} {'type': 'loss', 'content': 0.16281069815158844, 'timestamp': '2025-09-10 02:40:05.790164', 'step': 5407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:05.842806', 'step': 5407, 'epoch': 1} {'type': 'loss', 'content': 0.12512165307998657, 'timestamp': '2025-09-10 02:40:05.848575', 'step': 5408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:05.901555', 'step': 5408, 'epoch': 1} {'type': 'loss', 'content': 0.10107250511646271, 'timestamp': '2025-09-10 02:40:05.903601', 'step': 5409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:05.955998', 'step': 5409, 'epoch': 1} {'type': 'loss', 'content': 0.10962041467428207, 'timestamp': '2025-09-10 02:40:05.958070', 'step': 5410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:06.011601', 'step': 5410, 'epoch': 1} {'type': 'loss', 'content': 0.1569167524576187, 'timestamp': '2025-09-10 02:40:06.013725', 'step': 5411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:06.067432', 'step': 5411, 'epoch': 1} {'type': 'loss', 'content': 0.22930873930454254, 'timestamp': '2025-09-10 02:40:06.073387', 'step': 5412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:06.126829', 'step': 5412, 'epoch': 1} {'type': 'loss', 'content': 0.14786820113658905, 'timestamp': '2025-09-10 02:40:06.128824', 'step': 5413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:06.182066', 'step': 5413, 'epoch': 1} {'type': 'loss', 'content': 0.12241161614656448, 'timestamp': '2025-09-10 02:40:06.184392', 'step': 5414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:06.238106', 'step': 5414, 'epoch': 1} {'type': 'loss', 'content': 0.20527274906635284, 'timestamp': '2025-09-10 02:40:06.240532', 'step': 5415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:06.294430', 'step': 5415, 'epoch': 1} {'type': 'loss', 'content': 0.24317167699337006, 'timestamp': '2025-09-10 02:40:06.300016', 'step': 5416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:06.353427', 'step': 5416, 'epoch': 1} {'type': 'loss', 'content': 0.1645267754793167, 'timestamp': '2025-09-10 02:40:06.355629', 'step': 5417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:06.408257', 'step': 5417, 'epoch': 1} {'type': 'loss', 'content': 0.20430317521095276, 'timestamp': '2025-09-10 02:40:06.410880', 'step': 5418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:06.465415', 'step': 5418, 'epoch': 1} {'type': 'loss', 'content': 0.11952359229326248, 'timestamp': '2025-09-10 02:40:06.467549', 'step': 5419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:06.522562', 'step': 5419, 'epoch': 1} {'type': 'loss', 'content': 0.12733842432498932, 'timestamp': '2025-09-10 02:40:06.528906', 'step': 5420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:06.582736', 'step': 5420, 'epoch': 1} {'type': 'loss', 'content': 0.12416474521160126, 'timestamp': '2025-09-10 02:40:06.584925', 'step': 5421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:06.639071', 'step': 5421, 'epoch': 1} {'type': 'loss', 'content': 0.2251349687576294, 'timestamp': '2025-09-10 02:40:06.641295', 'step': 5422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:06.695812', 'step': 5422, 'epoch': 1} {'type': 'loss', 'content': 0.12951934337615967, 'timestamp': '2025-09-10 02:40:06.697967', 'step': 5423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:06.750883', 'step': 5423, 'epoch': 1} {'type': 'loss', 'content': 0.19323086738586426, 'timestamp': '2025-09-10 02:40:06.756918', 'step': 5424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:06.810000', 'step': 5424, 'epoch': 1} {'type': 'loss', 'content': 0.16031117737293243, 'timestamp': '2025-09-10 02:40:06.811972', 'step': 5425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:06.864784', 'step': 5425, 'epoch': 1} {'type': 'loss', 'content': 0.10589082539081573, 'timestamp': '2025-09-10 02:40:06.866767', 'step': 5426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:06.919899', 'step': 5426, 'epoch': 1} {'type': 'loss', 'content': 0.10150544345378876, 'timestamp': '2025-09-10 02:40:06.921930', 'step': 5427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:06.974836', 'step': 5427, 'epoch': 1} {'type': 'loss', 'content': 0.2470272183418274, 'timestamp': '2025-09-10 02:40:06.980648', 'step': 5428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:07.032837', 'step': 5428, 'epoch': 1} {'type': 'loss', 'content': 0.11495807021856308, 'timestamp': '2025-09-10 02:40:07.035075', 'step': 5429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:07.088948', 'step': 5429, 'epoch': 1} {'type': 'loss', 'content': 0.11646527051925659, 'timestamp': '2025-09-10 02:40:07.091093', 'step': 5430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:07.146334', 'step': 5430, 'epoch': 1} {'type': 'loss', 'content': 0.18630944192409515, 'timestamp': '2025-09-10 02:40:07.148309', 'step': 5431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:07.203472', 'step': 5431, 'epoch': 1} {'type': 'loss', 'content': 0.11454053223133087, 'timestamp': '2025-09-10 02:40:07.209508', 'step': 5432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:07.263183', 'step': 5432, 'epoch': 1} {'type': 'loss', 'content': 0.14377617835998535, 'timestamp': '2025-09-10 02:40:07.265245', 'step': 5433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:07.320249', 'step': 5433, 'epoch': 1} {'type': 'loss', 'content': 0.19855089485645294, 'timestamp': '2025-09-10 02:40:07.322163', 'step': 5434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:07.376949', 'step': 5434, 'epoch': 1} {'type': 'loss', 'content': 0.2816992998123169, 'timestamp': '2025-09-10 02:40:07.378894', 'step': 5435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:07.434216', 'step': 5435, 'epoch': 1} {'type': 'loss', 'content': 0.1458839774131775, 'timestamp': '2025-09-10 02:40:07.440278', 'step': 5436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:07.494699', 'step': 5436, 'epoch': 1} {'type': 'loss', 'content': 0.16600783169269562, 'timestamp': '2025-09-10 02:40:07.496711', 'step': 5437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:07.551230', 'step': 5437, 'epoch': 1} {'type': 'loss', 'content': 0.16075915098190308, 'timestamp': '2025-09-10 02:40:07.553247', 'step': 5438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:07.607004', 'step': 5438, 'epoch': 1} {'type': 'loss', 'content': 0.14178147912025452, 'timestamp': '2025-09-10 02:40:07.609029', 'step': 5439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:07.663864', 'step': 5439, 'epoch': 1} {'type': 'loss', 'content': 0.13018746674060822, 'timestamp': '2025-09-10 02:40:07.669769', 'step': 5440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:07.725837', 'step': 5440, 'epoch': 1} {'type': 'loss', 'content': 0.2242339551448822, 'timestamp': '2025-09-10 02:40:07.727848', 'step': 5441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:07.784310', 'step': 5441, 'epoch': 1} {'type': 'loss', 'content': 0.18299391865730286, 'timestamp': '2025-09-10 02:40:07.786338', 'step': 5442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:07.841659', 'step': 5442, 'epoch': 1} {'type': 'loss', 'content': 0.15627244114875793, 'timestamp': '2025-09-10 02:40:07.843814', 'step': 5443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:07.898618', 'step': 5443, 'epoch': 1} {'type': 'loss', 'content': 0.13216476142406464, 'timestamp': '2025-09-10 02:40:07.904800', 'step': 5444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:07.959287', 'step': 5444, 'epoch': 1} {'type': 'loss', 'content': 0.14755630493164062, 'timestamp': '2025-09-10 02:40:07.961255', 'step': 5445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:08.016791', 'step': 5445, 'epoch': 1} {'type': 'loss', 'content': 0.1325174868106842, 'timestamp': '2025-09-10 02:40:08.018768', 'step': 5446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:08.072644', 'step': 5446, 'epoch': 1} {'type': 'loss', 'content': 0.15733805298805237, 'timestamp': '2025-09-10 02:40:08.074813', 'step': 5447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:08.128887', 'step': 5447, 'epoch': 1} {'type': 'loss', 'content': 0.1629461944103241, 'timestamp': '2025-09-10 02:40:08.134869', 'step': 5448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:08.187800', 'step': 5448, 'epoch': 1} {'type': 'loss', 'content': 0.1783817708492279, 'timestamp': '2025-09-10 02:40:08.189711', 'step': 5449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:08.244375', 'step': 5449, 'epoch': 1} {'type': 'loss', 'content': 0.12836965918540955, 'timestamp': '2025-09-10 02:40:08.246363', 'step': 5450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:08.304178', 'step': 5450, 'epoch': 1} {'type': 'loss', 'content': 0.266426146030426, 'timestamp': '2025-09-10 02:40:08.305896', 'step': 5451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:08.361176', 'step': 5451, 'epoch': 1} {'type': 'loss', 'content': 0.13459664583206177, 'timestamp': '2025-09-10 02:40:08.367307', 'step': 5452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:08.421872', 'step': 5452, 'epoch': 1} {'type': 'loss', 'content': 0.1639017015695572, 'timestamp': '2025-09-10 02:40:08.423921', 'step': 5453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:08.478838', 'step': 5453, 'epoch': 1} {'type': 'loss', 'content': 0.28164318203926086, 'timestamp': '2025-09-10 02:40:08.480787', 'step': 5454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:08.534893', 'step': 5454, 'epoch': 1} {'type': 'loss', 'content': 0.13799583911895752, 'timestamp': '2025-09-10 02:40:08.536938', 'step': 5455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:08.590456', 'step': 5455, 'epoch': 1} {'type': 'loss', 'content': 0.09329551458358765, 'timestamp': '2025-09-10 02:40:08.596492', 'step': 5456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:08.650153', 'step': 5456, 'epoch': 1} {'type': 'loss', 'content': 0.1154542863368988, 'timestamp': '2025-09-10 02:40:08.652274', 'step': 5457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:08.707871', 'step': 5457, 'epoch': 1} {'type': 'loss', 'content': 0.24872900545597076, 'timestamp': '2025-09-10 02:40:08.710133', 'step': 5458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:08.766192', 'step': 5458, 'epoch': 1} {'type': 'loss', 'content': 0.14533960819244385, 'timestamp': '2025-09-10 02:40:08.768387', 'step': 5459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:08.824505', 'step': 5459, 'epoch': 1} {'type': 'loss', 'content': 0.1842241734266281, 'timestamp': '2025-09-10 02:40:08.830720', 'step': 5460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:08.884413', 'step': 5460, 'epoch': 1} {'type': 'loss', 'content': 0.152273029088974, 'timestamp': '2025-09-10 02:40:08.886429', 'step': 5461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:08.942344', 'step': 5461, 'epoch': 1} {'type': 'loss', 'content': 0.14520230889320374, 'timestamp': '2025-09-10 02:40:08.944301', 'step': 5462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:08.999135', 'step': 5462, 'epoch': 1} {'type': 'loss', 'content': 0.15771837532520294, 'timestamp': '2025-09-10 02:40:09.001066', 'step': 5463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:09.055027', 'step': 5463, 'epoch': 1} {'type': 'loss', 'content': 0.0924525037407875, 'timestamp': '2025-09-10 02:40:09.061260', 'step': 5464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:09.116444', 'step': 5464, 'epoch': 1} {'type': 'loss', 'content': 0.0915374904870987, 'timestamp': '2025-09-10 02:40:09.118393', 'step': 5465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:09.177094', 'step': 5465, 'epoch': 1} {'type': 'loss', 'content': 0.179282084107399, 'timestamp': '2025-09-10 02:40:09.179100', 'step': 5466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:09.237688', 'step': 5466, 'epoch': 1} {'type': 'loss', 'content': 0.15860088169574738, 'timestamp': '2025-09-10 02:40:09.239714', 'step': 5467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:09.302504', 'step': 5467, 'epoch': 1} {'type': 'loss', 'content': 0.21469640731811523, 'timestamp': '2025-09-10 02:40:09.309523', 'step': 5468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:09.367339', 'step': 5468, 'epoch': 1} {'type': 'loss', 'content': 0.12426166981458664, 'timestamp': '2025-09-10 02:40:09.369322', 'step': 5469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:09.426961', 'step': 5469, 'epoch': 1} {'type': 'loss', 'content': 0.25109994411468506, 'timestamp': '2025-09-10 02:40:09.429049', 'step': 5470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:09.487270', 'step': 5470, 'epoch': 1} {'type': 'loss', 'content': 0.1446380913257599, 'timestamp': '2025-09-10 02:40:09.489470', 'step': 5471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:09.549394', 'step': 5471, 'epoch': 1} {'type': 'loss', 'content': 0.1679612547159195, 'timestamp': '2025-09-10 02:40:09.556518', 'step': 5472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:09.615775', 'step': 5472, 'epoch': 1} {'type': 'loss', 'content': 0.1283307820558548, 'timestamp': '2025-09-10 02:40:09.618149', 'step': 5473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:09.679571', 'step': 5473, 'epoch': 1} {'type': 'loss', 'content': 0.1769849956035614, 'timestamp': '2025-09-10 02:40:09.681627', 'step': 5474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:09.741739', 'step': 5474, 'epoch': 1} {'type': 'loss', 'content': 0.09920382499694824, 'timestamp': '2025-09-10 02:40:09.743747', 'step': 5475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:09.800587', 'step': 5475, 'epoch': 1} {'type': 'loss', 'content': 0.105500228703022, 'timestamp': '2025-09-10 02:40:09.807172', 'step': 5476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:09.862777', 'step': 5476, 'epoch': 1} {'type': 'loss', 'content': 0.24097466468811035, 'timestamp': '2025-09-10 02:40:09.864830', 'step': 5477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:09.921548', 'step': 5477, 'epoch': 1} {'type': 'loss', 'content': 0.24864676594734192, 'timestamp': '2025-09-10 02:40:09.927245', 'step': 5478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:09.985056', 'step': 5478, 'epoch': 1} {'type': 'loss', 'content': 0.16362330317497253, 'timestamp': '2025-09-10 02:40:09.987042', 'step': 5479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:10.043772', 'step': 5479, 'epoch': 1} {'type': 'loss', 'content': 0.1114456057548523, 'timestamp': '2025-09-10 02:40:10.050118', 'step': 5480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:10.105126', 'step': 5480, 'epoch': 1} {'type': 'loss', 'content': 0.2099023312330246, 'timestamp': '2025-09-10 02:40:10.106849', 'step': 5481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:10.162905', 'step': 5481, 'epoch': 1} {'type': 'loss', 'content': 0.10804403573274612, 'timestamp': '2025-09-10 02:40:10.164958', 'step': 5482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:10.221592', 'step': 5482, 'epoch': 1} {'type': 'loss', 'content': 0.15174837410449982, 'timestamp': '2025-09-10 02:40:10.223487', 'step': 5483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:10.281018', 'step': 5483, 'epoch': 1} {'type': 'loss', 'content': 0.19009007513523102, 'timestamp': '2025-09-10 02:40:10.287503', 'step': 5484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:10.348347', 'step': 5484, 'epoch': 1} {'type': 'loss', 'content': 0.11082500964403152, 'timestamp': '2025-09-10 02:40:10.356856', 'step': 5485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:10.428163', 'step': 5485, 'epoch': 1} {'type': 'loss', 'content': 0.2334037572145462, 'timestamp': '2025-09-10 02:40:10.430305', 'step': 5486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:10.488315', 'step': 5486, 'epoch': 1} {'type': 'loss', 'content': 0.1653904765844345, 'timestamp': '2025-09-10 02:40:10.490308', 'step': 5487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:10.547360', 'step': 5487, 'epoch': 1} {'type': 'loss', 'content': 0.1574331670999527, 'timestamp': '2025-09-10 02:40:10.553898', 'step': 5488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:10.612795', 'step': 5488, 'epoch': 1} {'type': 'loss', 'content': 0.08708127588033676, 'timestamp': '2025-09-10 02:40:10.614964', 'step': 5489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:10.678372', 'step': 5489, 'epoch': 1} {'type': 'loss', 'content': 0.14332710206508636, 'timestamp': '2025-09-10 02:40:10.680463', 'step': 5490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:10.740932', 'step': 5490, 'epoch': 1} {'type': 'loss', 'content': 0.0978974923491478, 'timestamp': '2025-09-10 02:40:10.743134', 'step': 5491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:10.800493', 'step': 5491, 'epoch': 1} {'type': 'loss', 'content': 0.1075625866651535, 'timestamp': '2025-09-10 02:40:10.807110', 'step': 5492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:10.863685', 'step': 5492, 'epoch': 1} {'type': 'loss', 'content': 0.1997612565755844, 'timestamp': '2025-09-10 02:40:10.865970', 'step': 5493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:10.923976', 'step': 5493, 'epoch': 1} {'type': 'loss', 'content': 0.13648714125156403, 'timestamp': '2025-09-10 02:40:10.926122', 'step': 5494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:10.984374', 'step': 5494, 'epoch': 1} {'type': 'loss', 'content': 0.14832332730293274, 'timestamp': '2025-09-10 02:40:10.986473', 'step': 5495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:11.043744', 'step': 5495, 'epoch': 1} {'type': 'loss', 'content': 0.21041439473628998, 'timestamp': '2025-09-10 02:40:11.050160', 'step': 5496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:11.105641', 'step': 5496, 'epoch': 1} {'type': 'loss', 'content': 0.11669279634952545, 'timestamp': '2025-09-10 02:40:11.107501', 'step': 5497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:11.163371', 'step': 5497, 'epoch': 1} {'type': 'loss', 'content': 0.13392339646816254, 'timestamp': '2025-09-10 02:40:11.165501', 'step': 5498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:11.223107', 'step': 5498, 'epoch': 1} {'type': 'loss', 'content': 0.17147968709468842, 'timestamp': '2025-09-10 02:40:11.224989', 'step': 5499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:11.280575', 'step': 5499, 'epoch': 1} {'type': 'loss', 'content': 0.19862711429595947, 'timestamp': '2025-09-10 02:40:11.286886', 'step': 5500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-09-10 02:40:11.681335', 'step': 5500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:11.742163', 'step': 5500, 'epoch': 1} {'type': 'loss', 'content': 0.18190595507621765, 'timestamp': '2025-09-10 02:40:11.744504', 'step': 5501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:11.802034', 'step': 5501, 'epoch': 1} {'type': 'loss', 'content': 0.26981040835380554, 'timestamp': '2025-09-10 02:40:11.804344', 'step': 5502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:11.862066', 'step': 5502, 'epoch': 1} {'type': 'loss', 'content': 0.031186241656541824, 'timestamp': '2025-09-10 02:40:11.864273', 'step': 5503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:11.921961', 'step': 5503, 'epoch': 1} {'type': 'loss', 'content': 0.12413261830806732, 'timestamp': '2025-09-10 02:40:11.928442', 'step': 5504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:11.985394', 'step': 5504, 'epoch': 1} {'type': 'loss', 'content': 0.1253562867641449, 'timestamp': '2025-09-10 02:40:11.987741', 'step': 5505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:12.047577', 'step': 5505, 'epoch': 1} {'type': 'loss', 'content': 0.2092142552137375, 'timestamp': '2025-09-10 02:40:12.050159', 'step': 5506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:12.111444', 'step': 5506, 'epoch': 1} {'type': 'loss', 'content': 0.19851182401180267, 'timestamp': '2025-09-10 02:40:12.113778', 'step': 5507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:12.174894', 'step': 5507, 'epoch': 1} {'type': 'loss', 'content': 0.16437959671020508, 'timestamp': '2025-09-10 02:40:12.182163', 'step': 5508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:12.240511', 'step': 5508, 'epoch': 1} {'type': 'loss', 'content': 0.15048792958259583, 'timestamp': '2025-09-10 02:40:12.242574', 'step': 5509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:12.297029', 'step': 5509, 'epoch': 1} {'type': 'loss', 'content': 0.18386989831924438, 'timestamp': '2025-09-10 02:40:12.298962', 'step': 5510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:12.352648', 'step': 5510, 'epoch': 1} {'type': 'loss', 'content': 0.18769019842147827, 'timestamp': '2025-09-10 02:40:12.354860', 'step': 5511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:12.408906', 'step': 5511, 'epoch': 1} {'type': 'loss', 'content': 0.13062027096748352, 'timestamp': '2025-09-10 02:40:12.415030', 'step': 5512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:12.467898', 'step': 5512, 'epoch': 1} {'type': 'loss', 'content': 0.21521395444869995, 'timestamp': '2025-09-10 02:40:12.470245', 'step': 5513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:12.523659', 'step': 5513, 'epoch': 1} {'type': 'loss', 'content': 0.14746065437793732, 'timestamp': '2025-09-10 02:40:12.525550', 'step': 5514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:12.580327', 'step': 5514, 'epoch': 1} {'type': 'loss', 'content': 0.14659333229064941, 'timestamp': '2025-09-10 02:40:12.582968', 'step': 5515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:12.638676', 'step': 5515, 'epoch': 1} {'type': 'loss', 'content': 0.09968514740467072, 'timestamp': '2025-09-10 02:40:12.645093', 'step': 5516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:12.699456', 'step': 5516, 'epoch': 1} {'type': 'loss', 'content': 0.1512276530265808, 'timestamp': '2025-09-10 02:40:12.701468', 'step': 5517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:12.756152', 'step': 5517, 'epoch': 1} {'type': 'loss', 'content': 0.17647679150104523, 'timestamp': '2025-09-10 02:40:12.758414', 'step': 5518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:12.813317', 'step': 5518, 'epoch': 1} {'type': 'loss', 'content': 0.11585954576730728, 'timestamp': '2025-09-10 02:40:12.815382', 'step': 5519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:12.868484', 'step': 5519, 'epoch': 1} {'type': 'loss', 'content': 0.11380190402269363, 'timestamp': '2025-09-10 02:40:12.874477', 'step': 5520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:12.926269', 'step': 5520, 'epoch': 1} {'type': 'loss', 'content': 0.20091500878334045, 'timestamp': '2025-09-10 02:40:12.928353', 'step': 5521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:12.981626', 'step': 5521, 'epoch': 1} {'type': 'loss', 'content': 0.10177763551473618, 'timestamp': '2025-09-10 02:40:12.983687', 'step': 5522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:13.037414', 'step': 5522, 'epoch': 1} {'type': 'loss', 'content': 0.16224312782287598, 'timestamp': '2025-09-10 02:40:13.039429', 'step': 5523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:13.092869', 'step': 5523, 'epoch': 1} {'type': 'loss', 'content': 0.16285720467567444, 'timestamp': '2025-09-10 02:40:13.099214', 'step': 5524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:13.152476', 'step': 5524, 'epoch': 1} {'type': 'loss', 'content': 0.10899282991886139, 'timestamp': '2025-09-10 02:40:13.154619', 'step': 5525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:13.208007', 'step': 5525, 'epoch': 1} {'type': 'loss', 'content': 0.2189241498708725, 'timestamp': '2025-09-10 02:40:13.210002', 'step': 5526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:13.263010', 'step': 5526, 'epoch': 1} {'type': 'loss', 'content': 0.15959496796131134, 'timestamp': '2025-09-10 02:40:13.265044', 'step': 5527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:13.317694', 'step': 5527, 'epoch': 1} {'type': 'loss', 'content': 0.16298723220825195, 'timestamp': '2025-09-10 02:40:13.323408', 'step': 5528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:13.375594', 'step': 5528, 'epoch': 1} {'type': 'loss', 'content': 0.08819980919361115, 'timestamp': '2025-09-10 02:40:13.377728', 'step': 5529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:13.431318', 'step': 5529, 'epoch': 1} {'type': 'loss', 'content': 0.15139766037464142, 'timestamp': '2025-09-10 02:40:13.433546', 'step': 5530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:13.486497', 'step': 5530, 'epoch': 1} {'type': 'loss', 'content': 0.15231993794441223, 'timestamp': '2025-09-10 02:40:13.488653', 'step': 5531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:13.541735', 'step': 5531, 'epoch': 1} {'type': 'loss', 'content': 0.22162723541259766, 'timestamp': '2025-09-10 02:40:13.547567', 'step': 5532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:13.599758', 'step': 5532, 'epoch': 1} {'type': 'loss', 'content': 0.16661515831947327, 'timestamp': '2025-09-10 02:40:13.601919', 'step': 5533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:13.657136', 'step': 5533, 'epoch': 1} {'type': 'loss', 'content': 0.27440759539604187, 'timestamp': '2025-09-10 02:40:13.659325', 'step': 5534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:13.712684', 'step': 5534, 'epoch': 1} {'type': 'loss', 'content': 0.20030677318572998, 'timestamp': '2025-09-10 02:40:13.714827', 'step': 5535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:13.767910', 'step': 5535, 'epoch': 1} {'type': 'loss', 'content': 0.1601043939590454, 'timestamp': '2025-09-10 02:40:13.773751', 'step': 5536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:13.827572', 'step': 5536, 'epoch': 1} {'type': 'loss', 'content': 0.13952787220478058, 'timestamp': '2025-09-10 02:40:13.829606', 'step': 5537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:13.882656', 'step': 5537, 'epoch': 1} {'type': 'loss', 'content': 0.18159079551696777, 'timestamp': '2025-09-10 02:40:13.884665', 'step': 5538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:13.938120', 'step': 5538, 'epoch': 1} {'type': 'loss', 'content': 0.18726539611816406, 'timestamp': '2025-09-10 02:40:13.940373', 'step': 5539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:13.994030', 'step': 5539, 'epoch': 1} {'type': 'loss', 'content': 0.1553991585969925, 'timestamp': '2025-09-10 02:40:14.000125', 'step': 5540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:14.052789', 'step': 5540, 'epoch': 1} {'type': 'loss', 'content': 0.12885579466819763, 'timestamp': '2025-09-10 02:40:14.054906', 'step': 5541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:14.107850', 'step': 5541, 'epoch': 1} {'type': 'loss', 'content': 0.1494605988264084, 'timestamp': '2025-09-10 02:40:14.110189', 'step': 5542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:14.163671', 'step': 5542, 'epoch': 1} {'type': 'loss', 'content': 0.14588624238967896, 'timestamp': '2025-09-10 02:40:14.166019', 'step': 5543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:14.220360', 'step': 5543, 'epoch': 1} {'type': 'loss', 'content': 0.1580665409564972, 'timestamp': '2025-09-10 02:40:14.226696', 'step': 5544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:14.279344', 'step': 5544, 'epoch': 1} {'type': 'loss', 'content': 0.11892128735780716, 'timestamp': '2025-09-10 02:40:14.281762', 'step': 5545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:14.334854', 'step': 5545, 'epoch': 1} {'type': 'loss', 'content': 0.20554611086845398, 'timestamp': '2025-09-10 02:40:14.336886', 'step': 5546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:14.389932', 'step': 5546, 'epoch': 1} {'type': 'loss', 'content': 0.14885973930358887, 'timestamp': '2025-09-10 02:40:14.392002', 'step': 5547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:14.445975', 'step': 5547, 'epoch': 1} {'type': 'loss', 'content': 0.17551645636558533, 'timestamp': '2025-09-10 02:40:14.451859', 'step': 5548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:14.504430', 'step': 5548, 'epoch': 1} {'type': 'loss', 'content': 0.16719874739646912, 'timestamp': '2025-09-10 02:40:14.506444', 'step': 5549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:14.559372', 'step': 5549, 'epoch': 1} {'type': 'loss', 'content': 0.1445275992155075, 'timestamp': '2025-09-10 02:40:14.561392', 'step': 5550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:14.614326', 'step': 5550, 'epoch': 1} {'type': 'loss', 'content': 0.28406980633735657, 'timestamp': '2025-09-10 02:40:14.616512', 'step': 5551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:14.670498', 'step': 5551, 'epoch': 1} {'type': 'loss', 'content': 0.22109954059123993, 'timestamp': '2025-09-10 02:40:14.676365', 'step': 5552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:14.731177', 'step': 5552, 'epoch': 1} {'type': 'loss', 'content': 0.27803274989128113, 'timestamp': '2025-09-10 02:40:14.733297', 'step': 5553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:14.786668', 'step': 5553, 'epoch': 1} {'type': 'loss', 'content': 0.17127291858196259, 'timestamp': '2025-09-10 02:40:14.788705', 'step': 5554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:14.842380', 'step': 5554, 'epoch': 1} {'type': 'loss', 'content': 0.13947221636772156, 'timestamp': '2025-09-10 02:40:14.844571', 'step': 5555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:14.897575', 'step': 5555, 'epoch': 1} {'type': 'loss', 'content': 0.1368904560804367, 'timestamp': '2025-09-10 02:40:14.903559', 'step': 5556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:14.959905', 'step': 5556, 'epoch': 1} {'type': 'loss', 'content': 0.231467142701149, 'timestamp': '2025-09-10 02:40:14.961926', 'step': 5557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:15.018092', 'step': 5557, 'epoch': 1} {'type': 'loss', 'content': 0.1847180724143982, 'timestamp': '2025-09-10 02:40:15.020184', 'step': 5558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:15.076694', 'step': 5558, 'epoch': 1} {'type': 'loss', 'content': 0.12990033626556396, 'timestamp': '2025-09-10 02:40:15.078862', 'step': 5559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:15.137011', 'step': 5559, 'epoch': 1} {'type': 'loss', 'content': 0.09481314569711685, 'timestamp': '2025-09-10 02:40:15.143190', 'step': 5560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:15.198447', 'step': 5560, 'epoch': 1} {'type': 'loss', 'content': 0.12585794925689697, 'timestamp': '2025-09-10 02:40:15.200539', 'step': 5561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:15.255690', 'step': 5561, 'epoch': 1} {'type': 'loss', 'content': 0.10237528383731842, 'timestamp': '2025-09-10 02:40:15.257755', 'step': 5562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:15.312100', 'step': 5562, 'epoch': 1} {'type': 'loss', 'content': 0.23564112186431885, 'timestamp': '2025-09-10 02:40:15.314090', 'step': 5563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:15.366973', 'step': 5563, 'epoch': 1} {'type': 'loss', 'content': 0.12902508676052094, 'timestamp': '2025-09-10 02:40:15.372926', 'step': 5564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:15.425928', 'step': 5564, 'epoch': 1} {'type': 'loss', 'content': 0.1484438180923462, 'timestamp': '2025-09-10 02:40:15.427954', 'step': 5565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:15.481209', 'step': 5565, 'epoch': 1} {'type': 'loss', 'content': 0.25229743123054504, 'timestamp': '2025-09-10 02:40:15.483229', 'step': 5566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:15.535974', 'step': 5566, 'epoch': 1} {'type': 'loss', 'content': 0.159399151802063, 'timestamp': '2025-09-10 02:40:15.538092', 'step': 5567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:15.590989', 'step': 5567, 'epoch': 1} {'type': 'loss', 'content': 0.18318390846252441, 'timestamp': '2025-09-10 02:40:15.596846', 'step': 5568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:15.648832', 'step': 5568, 'epoch': 1} {'type': 'loss', 'content': 0.2481137216091156, 'timestamp': '2025-09-10 02:40:15.651024', 'step': 5569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:15.704403', 'step': 5569, 'epoch': 1} {'type': 'loss', 'content': 0.1542321741580963, 'timestamp': '2025-09-10 02:40:15.706458', 'step': 5570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:15.759387', 'step': 5570, 'epoch': 1} {'type': 'loss', 'content': 0.19785554707050323, 'timestamp': '2025-09-10 02:40:15.761543', 'step': 5571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:15.815871', 'step': 5571, 'epoch': 1} {'type': 'loss', 'content': 0.11476746946573257, 'timestamp': '2025-09-10 02:40:15.821842', 'step': 5572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:15.875292', 'step': 5572, 'epoch': 1} {'type': 'loss', 'content': 0.09337200969457626, 'timestamp': '2025-09-10 02:40:15.877473', 'step': 5573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:15.930497', 'step': 5573, 'epoch': 1} {'type': 'loss', 'content': 0.1748218983411789, 'timestamp': '2025-09-10 02:40:15.932767', 'step': 5574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:15.986399', 'step': 5574, 'epoch': 1} {'type': 'loss', 'content': 0.12098187953233719, 'timestamp': '2025-09-10 02:40:15.988537', 'step': 5575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:16.041495', 'step': 5575, 'epoch': 1} {'type': 'loss', 'content': 0.14272688329219818, 'timestamp': '2025-09-10 02:40:16.047274', 'step': 5576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:16.099262', 'step': 5576, 'epoch': 1} {'type': 'loss', 'content': 0.16862066090106964, 'timestamp': '2025-09-10 02:40:16.101311', 'step': 5577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:16.154015', 'step': 5577, 'epoch': 1} {'type': 'loss', 'content': 0.13564594089984894, 'timestamp': '2025-09-10 02:40:16.156185', 'step': 5578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:16.209261', 'step': 5578, 'epoch': 1} {'type': 'loss', 'content': 0.12687917053699493, 'timestamp': '2025-09-10 02:40:16.211353', 'step': 5579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:16.264350', 'step': 5579, 'epoch': 1} {'type': 'loss', 'content': 0.1883762925863266, 'timestamp': '2025-09-10 02:40:16.270111', 'step': 5580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:16.324508', 'step': 5580, 'epoch': 1} {'type': 'loss', 'content': 0.12027032673358917, 'timestamp': '2025-09-10 02:40:16.326647', 'step': 5581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:16.379839', 'step': 5581, 'epoch': 1} {'type': 'loss', 'content': 0.1205640435218811, 'timestamp': '2025-09-10 02:40:16.381917', 'step': 5582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:16.435280', 'step': 5582, 'epoch': 1} {'type': 'loss', 'content': 0.11123470216989517, 'timestamp': '2025-09-10 02:40:16.437308', 'step': 5583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:16.491170', 'step': 5583, 'epoch': 1} {'type': 'loss', 'content': 0.15408557653427124, 'timestamp': '2025-09-10 02:40:16.497056', 'step': 5584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:16.549279', 'step': 5584, 'epoch': 1} {'type': 'loss', 'content': 0.2607886493206024, 'timestamp': '2025-09-10 02:40:16.551441', 'step': 5585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:16.604680', 'step': 5585, 'epoch': 1} {'type': 'loss', 'content': 0.08457502722740173, 'timestamp': '2025-09-10 02:40:16.606653', 'step': 5586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:16.659986', 'step': 5586, 'epoch': 1} {'type': 'loss', 'content': 0.0937516838312149, 'timestamp': '2025-09-10 02:40:16.662137', 'step': 5587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:16.715980', 'step': 5587, 'epoch': 1} {'type': 'loss', 'content': 0.10197796672582626, 'timestamp': '2025-09-10 02:40:16.722089', 'step': 5588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:16.775404', 'step': 5588, 'epoch': 1} {'type': 'loss', 'content': 0.14402727782726288, 'timestamp': '2025-09-10 02:40:16.778064', 'step': 5589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:16.835351', 'step': 5589, 'epoch': 1} {'type': 'loss', 'content': 0.10464967787265778, 'timestamp': '2025-09-10 02:40:16.838026', 'step': 5590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:16.895943', 'step': 5590, 'epoch': 1} {'type': 'loss', 'content': 0.20851872861385345, 'timestamp': '2025-09-10 02:40:16.898584', 'step': 5591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:16.953369', 'step': 5591, 'epoch': 1} {'type': 'loss', 'content': 0.09353883564472198, 'timestamp': '2025-09-10 02:40:16.959625', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:40:29.623986', 'step': 5592, 'epoch': 1} {'type': 'pplx', 'content': 14490.32662914042, 'timestamp': '2025-09-10 02:40:29.626874', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:29.680217', 'step': 5592, 'epoch': 1} {'type': 'loss', 'content': 0.13928239047527313, 'timestamp': '2025-09-10 02:40:29.682357', 'step': 5593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:29.735609', 'step': 5593, 'epoch': 1} {'type': 'loss', 'content': 0.12271222472190857, 'timestamp': '2025-09-10 02:40:29.737835', 'step': 5594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:29.790533', 'step': 5594, 'epoch': 1} {'type': 'loss', 'content': 0.17845728993415833, 'timestamp': '2025-09-10 02:40:29.792997', 'step': 5595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:29.845950', 'step': 5595, 'epoch': 1} {'type': 'loss', 'content': 0.19075076282024384, 'timestamp': '2025-09-10 02:40:29.853710', 'step': 5596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:29.906301', 'step': 5596, 'epoch': 1} {'type': 'loss', 'content': 0.3041199743747711, 'timestamp': '2025-09-10 02:40:29.908658', 'step': 5597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:29.963012', 'step': 5597, 'epoch': 1} {'type': 'loss', 'content': 0.11092815548181534, 'timestamp': '2025-09-10 02:40:29.965320', 'step': 5598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:30.018303', 'step': 5598, 'epoch': 1} {'type': 'loss', 'content': 0.15267091989517212, 'timestamp': '2025-09-10 02:40:30.020585', 'step': 5599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:30.073493', 'step': 5599, 'epoch': 1} {'type': 'loss', 'content': 0.17509974539279938, 'timestamp': '2025-09-10 02:40:30.079508', 'step': 5600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:30.131855', 'step': 5600, 'epoch': 1} {'type': 'loss', 'content': 0.17878679931163788, 'timestamp': '2025-09-10 02:40:30.134082', 'step': 5601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:30.186746', 'step': 5601, 'epoch': 1} {'type': 'loss', 'content': 0.17218686640262604, 'timestamp': '2025-09-10 02:40:30.188917', 'step': 5602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:30.242191', 'step': 5602, 'epoch': 1} {'type': 'loss', 'content': 0.2224690467119217, 'timestamp': '2025-09-10 02:40:30.244603', 'step': 5603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:30.297102', 'step': 5603, 'epoch': 1} {'type': 'loss', 'content': 0.15692786872386932, 'timestamp': '2025-09-10 02:40:30.303001', 'step': 5604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:30.355252', 'step': 5604, 'epoch': 1} {'type': 'loss', 'content': 0.17004385590553284, 'timestamp': '2025-09-10 02:40:30.357530', 'step': 5605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:30.410491', 'step': 5605, 'epoch': 1} {'type': 'loss', 'content': 0.06566518545150757, 'timestamp': '2025-09-10 02:40:30.412905', 'step': 5606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:30.467614', 'step': 5606, 'epoch': 1} {'type': 'loss', 'content': 0.18119387328624725, 'timestamp': '2025-09-10 02:40:30.469587', 'step': 5607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:30.524092', 'step': 5607, 'epoch': 1} {'type': 'loss', 'content': 0.22068603336811066, 'timestamp': '2025-09-10 02:40:30.529897', 'step': 5608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:30.582602', 'step': 5608, 'epoch': 1} {'type': 'loss', 'content': 0.12828251719474792, 'timestamp': '2025-09-10 02:40:30.584564', 'step': 5609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:30.637778', 'step': 5609, 'epoch': 1} {'type': 'loss', 'content': 0.08943447470664978, 'timestamp': '2025-09-10 02:40:30.639805', 'step': 5610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:30.692814', 'step': 5610, 'epoch': 1} {'type': 'loss', 'content': 0.09125884622335434, 'timestamp': '2025-09-10 02:40:30.695238', 'step': 5611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:30.757416', 'step': 5611, 'epoch': 1} {'type': 'loss', 'content': 0.14361797273159027, 'timestamp': '2025-09-10 02:40:30.763711', 'step': 5612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:30.815922', 'step': 5612, 'epoch': 1} {'type': 'loss', 'content': 0.13066814839839935, 'timestamp': '2025-09-10 02:40:30.818477', 'step': 5613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:30.871519', 'step': 5613, 'epoch': 1} {'type': 'loss', 'content': 0.18326996266841888, 'timestamp': '2025-09-10 02:40:30.873848', 'step': 5614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:30.926965', 'step': 5614, 'epoch': 1} {'type': 'loss', 'content': 0.16486816108226776, 'timestamp': '2025-09-10 02:40:30.929245', 'step': 5615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:30.984137', 'step': 5615, 'epoch': 1} {'type': 'loss', 'content': 0.12375018000602722, 'timestamp': '2025-09-10 02:40:30.989859', 'step': 5616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:31.042997', 'step': 5616, 'epoch': 1} {'type': 'loss', 'content': 0.14731386303901672, 'timestamp': '2025-09-10 02:40:31.045207', 'step': 5617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:31.098081', 'step': 5617, 'epoch': 1} {'type': 'loss', 'content': 0.20909495651721954, 'timestamp': '2025-09-10 02:40:31.100121', 'step': 5618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:31.153113', 'step': 5618, 'epoch': 1} {'type': 'loss', 'content': 0.13894933462142944, 'timestamp': '2025-09-10 02:40:31.155400', 'step': 5619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:31.208020', 'step': 5619, 'epoch': 1} {'type': 'loss', 'content': 0.20247626304626465, 'timestamp': '2025-09-10 02:40:31.213950', 'step': 5620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:31.266116', 'step': 5620, 'epoch': 1} {'type': 'loss', 'content': 0.29408860206604004, 'timestamp': '2025-09-10 02:40:31.268635', 'step': 5621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:31.322319', 'step': 5621, 'epoch': 1} {'type': 'loss', 'content': 0.18974569439888, 'timestamp': '2025-09-10 02:40:31.324523', 'step': 5622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:31.377569', 'step': 5622, 'epoch': 1} {'type': 'loss', 'content': 0.15118053555488586, 'timestamp': '2025-09-10 02:40:31.379807', 'step': 5623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:31.433950', 'step': 5623, 'epoch': 1} {'type': 'loss', 'content': 0.17691747844219208, 'timestamp': '2025-09-10 02:40:31.439807', 'step': 5624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:31.493580', 'step': 5624, 'epoch': 1} {'type': 'loss', 'content': 0.1940915733575821, 'timestamp': '2025-09-10 02:40:31.495624', 'step': 5625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:31.549813', 'step': 5625, 'epoch': 1} {'type': 'loss', 'content': 0.23768991231918335, 'timestamp': '2025-09-10 02:40:31.551928', 'step': 5626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:31.605170', 'step': 5626, 'epoch': 1} {'type': 'loss', 'content': 0.31885653734207153, 'timestamp': '2025-09-10 02:40:31.607187', 'step': 5627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:31.661686', 'step': 5627, 'epoch': 1} {'type': 'loss', 'content': 0.1505882740020752, 'timestamp': '2025-09-10 02:40:31.667741', 'step': 5628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:31.720899', 'step': 5628, 'epoch': 1} {'type': 'loss', 'content': 0.17359653115272522, 'timestamp': '2025-09-10 02:40:31.723280', 'step': 5629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:31.776600', 'step': 5629, 'epoch': 1} {'type': 'loss', 'content': 0.23909611999988556, 'timestamp': '2025-09-10 02:40:31.778932', 'step': 5630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:31.831503', 'step': 5630, 'epoch': 1} {'type': 'loss', 'content': 0.17237554490566254, 'timestamp': '2025-09-10 02:40:31.833903', 'step': 5631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:31.887286', 'step': 5631, 'epoch': 1} {'type': 'loss', 'content': 0.16714036464691162, 'timestamp': '2025-09-10 02:40:31.893445', 'step': 5632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:31.946221', 'step': 5632, 'epoch': 1} {'type': 'loss', 'content': 0.167667955160141, 'timestamp': '2025-09-10 02:40:31.948390', 'step': 5633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:32.001924', 'step': 5633, 'epoch': 1} {'type': 'loss', 'content': 0.16661877930164337, 'timestamp': '2025-09-10 02:40:32.004328', 'step': 5634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:32.059562', 'step': 5634, 'epoch': 1} {'type': 'loss', 'content': 0.22431671619415283, 'timestamp': '2025-09-10 02:40:32.062390', 'step': 5635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:32.116838', 'step': 5635, 'epoch': 1} {'type': 'loss', 'content': 0.20092278718948364, 'timestamp': '2025-09-10 02:40:32.123030', 'step': 5636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:32.176099', 'step': 5636, 'epoch': 1} {'type': 'loss', 'content': 0.15550760924816132, 'timestamp': '2025-09-10 02:40:32.178461', 'step': 5637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:32.231545', 'step': 5637, 'epoch': 1} {'type': 'loss', 'content': 0.12819057703018188, 'timestamp': '2025-09-10 02:40:32.233701', 'step': 5638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:32.287719', 'step': 5638, 'epoch': 1} {'type': 'loss', 'content': 0.24242855608463287, 'timestamp': '2025-09-10 02:40:32.290331', 'step': 5639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:32.344100', 'step': 5639, 'epoch': 1} {'type': 'loss', 'content': 0.171528622508049, 'timestamp': '2025-09-10 02:40:32.350200', 'step': 5640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:32.403115', 'step': 5640, 'epoch': 1} {'type': 'loss', 'content': 0.1444922387599945, 'timestamp': '2025-09-10 02:40:32.405464', 'step': 5641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:32.458532', 'step': 5641, 'epoch': 1} {'type': 'loss', 'content': 0.27484726905822754, 'timestamp': '2025-09-10 02:40:32.460650', 'step': 5642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:32.514482', 'step': 5642, 'epoch': 1} {'type': 'loss', 'content': 0.22647057473659515, 'timestamp': '2025-09-10 02:40:32.516889', 'step': 5643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:32.572307', 'step': 5643, 'epoch': 1} {'type': 'loss', 'content': 0.17985455691814423, 'timestamp': '2025-09-10 02:40:32.578316', 'step': 5644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:32.630841', 'step': 5644, 'epoch': 1} {'type': 'loss', 'content': 0.1697397381067276, 'timestamp': '2025-09-10 02:40:32.633114', 'step': 5645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:32.686409', 'step': 5645, 'epoch': 1} {'type': 'loss', 'content': 0.18889032304286957, 'timestamp': '2025-09-10 02:40:32.688673', 'step': 5646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:32.743109', 'step': 5646, 'epoch': 1} {'type': 'loss', 'content': 0.09334300458431244, 'timestamp': '2025-09-10 02:40:32.745616', 'step': 5647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:32.800997', 'step': 5647, 'epoch': 1} {'type': 'loss', 'content': 0.22431953251361847, 'timestamp': '2025-09-10 02:40:32.807178', 'step': 5648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:32.860062', 'step': 5648, 'epoch': 1} {'type': 'loss', 'content': 0.16107356548309326, 'timestamp': '2025-09-10 02:40:32.862145', 'step': 5649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:32.915051', 'step': 5649, 'epoch': 1} {'type': 'loss', 'content': 0.14866884052753448, 'timestamp': '2025-09-10 02:40:32.917118', 'step': 5650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:32.969928', 'step': 5650, 'epoch': 1} {'type': 'loss', 'content': 0.13033081591129303, 'timestamp': '2025-09-10 02:40:32.972090', 'step': 5651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:33.025481', 'step': 5651, 'epoch': 1} {'type': 'loss', 'content': 0.05841460078954697, 'timestamp': '2025-09-10 02:40:33.031174', 'step': 5652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:33.084143', 'step': 5652, 'epoch': 1} {'type': 'loss', 'content': 0.11765368282794952, 'timestamp': '2025-09-10 02:40:33.086501', 'step': 5653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:33.141008', 'step': 5653, 'epoch': 1} {'type': 'loss', 'content': 0.2064434289932251, 'timestamp': '2025-09-10 02:40:33.143589', 'step': 5654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:33.196211', 'step': 5654, 'epoch': 1} {'type': 'loss', 'content': 0.16920670866966248, 'timestamp': '2025-09-10 02:40:33.198779', 'step': 5655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:33.252452', 'step': 5655, 'epoch': 1} {'type': 'loss', 'content': 0.17966631054878235, 'timestamp': '2025-09-10 02:40:33.258488', 'step': 5656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:33.310791', 'step': 5656, 'epoch': 1} {'type': 'loss', 'content': 0.16201572120189667, 'timestamp': '2025-09-10 02:40:33.313389', 'step': 5657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:33.368998', 'step': 5657, 'epoch': 1} {'type': 'loss', 'content': 0.2632392644882202, 'timestamp': '2025-09-10 02:40:33.371022', 'step': 5658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:33.426977', 'step': 5658, 'epoch': 1} {'type': 'loss', 'content': 0.16450761258602142, 'timestamp': '2025-09-10 02:40:33.430302', 'step': 5659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:33.485147', 'step': 5659, 'epoch': 1} {'type': 'loss', 'content': 0.11830717325210571, 'timestamp': '2025-09-10 02:40:33.491142', 'step': 5660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:33.544725', 'step': 5660, 'epoch': 1} {'type': 'loss', 'content': 0.15250389277935028, 'timestamp': '2025-09-10 02:40:33.547006', 'step': 5661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:33.600992', 'step': 5661, 'epoch': 1} {'type': 'loss', 'content': 0.20587027072906494, 'timestamp': '2025-09-10 02:40:33.603168', 'step': 5662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:33.658086', 'step': 5662, 'epoch': 1} {'type': 'loss', 'content': 0.10527442395687103, 'timestamp': '2025-09-10 02:40:33.660337', 'step': 5663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:33.716145', 'step': 5663, 'epoch': 1} {'type': 'loss', 'content': 0.14137178659439087, 'timestamp': '2025-09-10 02:40:33.722427', 'step': 5664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:33.779219', 'step': 5664, 'epoch': 1} {'type': 'loss', 'content': 0.13792571425437927, 'timestamp': '2025-09-10 02:40:33.781457', 'step': 5665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:33.834840', 'step': 5665, 'epoch': 1} {'type': 'loss', 'content': 0.08252642303705215, 'timestamp': '2025-09-10 02:40:33.837096', 'step': 5666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:33.892147', 'step': 5666, 'epoch': 1} {'type': 'loss', 'content': 0.15422436594963074, 'timestamp': '2025-09-10 02:40:33.894346', 'step': 5667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:33.957214', 'step': 5667, 'epoch': 1} {'type': 'loss', 'content': 0.13130666315555573, 'timestamp': '2025-09-10 02:40:33.963283', 'step': 5668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:34.017242', 'step': 5668, 'epoch': 1} {'type': 'loss', 'content': 0.18712367117404938, 'timestamp': '2025-09-10 02:40:34.019812', 'step': 5669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:34.076955', 'step': 5669, 'epoch': 1} {'type': 'loss', 'content': 0.10655694454908371, 'timestamp': '2025-09-10 02:40:34.079194', 'step': 5670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:34.133322', 'step': 5670, 'epoch': 1} {'type': 'loss', 'content': 0.13753867149353027, 'timestamp': '2025-09-10 02:40:34.135556', 'step': 5671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:34.189862', 'step': 5671, 'epoch': 1} {'type': 'loss', 'content': 0.15246400237083435, 'timestamp': '2025-09-10 02:40:34.196253', 'step': 5672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:34.250226', 'step': 5672, 'epoch': 1} {'type': 'loss', 'content': 0.22882497310638428, 'timestamp': '2025-09-10 02:40:34.252462', 'step': 5673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:34.307312', 'step': 5673, 'epoch': 1} {'type': 'loss', 'content': 0.13015201687812805, 'timestamp': '2025-09-10 02:40:34.309663', 'step': 5674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:34.364399', 'step': 5674, 'epoch': 1} {'type': 'loss', 'content': 0.14879778027534485, 'timestamp': '2025-09-10 02:40:34.366451', 'step': 5675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:34.421544', 'step': 5675, 'epoch': 1} {'type': 'loss', 'content': 0.17008022964000702, 'timestamp': '2025-09-10 02:40:34.427512', 'step': 5676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:34.480859', 'step': 5676, 'epoch': 1} {'type': 'loss', 'content': 0.11793292313814163, 'timestamp': '2025-09-10 02:40:34.482808', 'step': 5677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:34.536971', 'step': 5677, 'epoch': 1} {'type': 'loss', 'content': 0.09606263786554337, 'timestamp': '2025-09-10 02:40:34.539353', 'step': 5678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:34.592722', 'step': 5678, 'epoch': 1} {'type': 'loss', 'content': 0.09843233972787857, 'timestamp': '2025-09-10 02:40:34.594949', 'step': 5679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:34.647992', 'step': 5679, 'epoch': 1} {'type': 'loss', 'content': 0.1253420114517212, 'timestamp': '2025-09-10 02:40:34.654344', 'step': 5680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:34.709429', 'step': 5680, 'epoch': 1} {'type': 'loss', 'content': 0.1621427685022354, 'timestamp': '2025-09-10 02:40:34.711611', 'step': 5681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:34.766272', 'step': 5681, 'epoch': 1} {'type': 'loss', 'content': 0.14887189865112305, 'timestamp': '2025-09-10 02:40:34.768412', 'step': 5682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:34.822699', 'step': 5682, 'epoch': 1} {'type': 'loss', 'content': 0.17363382875919342, 'timestamp': '2025-09-10 02:40:34.824748', 'step': 5683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:34.879185', 'step': 5683, 'epoch': 1} {'type': 'loss', 'content': 0.27100080251693726, 'timestamp': '2025-09-10 02:40:34.885615', 'step': 5684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:34.938896', 'step': 5684, 'epoch': 1} {'type': 'loss', 'content': 0.1590474396944046, 'timestamp': '2025-09-10 02:40:34.941781', 'step': 5685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:35.017397', 'step': 5685, 'epoch': 1} {'type': 'loss', 'content': 0.18413908779621124, 'timestamp': '2025-09-10 02:40:35.019619', 'step': 5686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:35.083517', 'step': 5686, 'epoch': 1} {'type': 'loss', 'content': 0.15082499384880066, 'timestamp': '2025-09-10 02:40:35.085816', 'step': 5687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:35.151866', 'step': 5687, 'epoch': 1} {'type': 'loss', 'content': 0.20322313904762268, 'timestamp': '2025-09-10 02:40:35.160738', 'step': 5688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:35.227769', 'step': 5688, 'epoch': 1} {'type': 'loss', 'content': 0.16082480549812317, 'timestamp': '2025-09-10 02:40:35.230155', 'step': 5689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:35.303332', 'step': 5689, 'epoch': 1} {'type': 'loss', 'content': 0.19052767753601074, 'timestamp': '2025-09-10 02:40:35.305707', 'step': 5690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:35.384603', 'step': 5690, 'epoch': 1} {'type': 'loss', 'content': 0.21575337648391724, 'timestamp': '2025-09-10 02:40:35.387229', 'step': 5691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:35.464011', 'step': 5691, 'epoch': 1} {'type': 'loss', 'content': 0.1778954416513443, 'timestamp': '2025-09-10 02:40:35.471004', 'step': 5692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:35.526157', 'step': 5692, 'epoch': 1} {'type': 'loss', 'content': 0.1362484097480774, 'timestamp': '2025-09-10 02:40:35.528608', 'step': 5693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:35.583143', 'step': 5693, 'epoch': 1} {'type': 'loss', 'content': 0.2924884557723999, 'timestamp': '2025-09-10 02:40:35.585579', 'step': 5694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:35.641794', 'step': 5694, 'epoch': 1} {'type': 'loss', 'content': 0.16685813665390015, 'timestamp': '2025-09-10 02:40:35.644321', 'step': 5695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:35.699078', 'step': 5695, 'epoch': 1} {'type': 'loss', 'content': 0.2504727244377136, 'timestamp': '2025-09-10 02:40:35.705762', 'step': 5696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:35.760821', 'step': 5696, 'epoch': 1} {'type': 'loss', 'content': 0.11183124035596848, 'timestamp': '2025-09-10 02:40:35.763380', 'step': 5697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:35.819081', 'step': 5697, 'epoch': 1} {'type': 'loss', 'content': 0.1814042180776596, 'timestamp': '2025-09-10 02:40:35.821587', 'step': 5698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:35.877235', 'step': 5698, 'epoch': 1} {'type': 'loss', 'content': 0.10809449851512909, 'timestamp': '2025-09-10 02:40:35.879754', 'step': 5699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:35.934762', 'step': 5699, 'epoch': 1} {'type': 'loss', 'content': 0.1000475287437439, 'timestamp': '2025-09-10 02:40:35.941391', 'step': 5700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:35.995971', 'step': 5700, 'epoch': 1} {'type': 'loss', 'content': 0.1849052459001541, 'timestamp': '2025-09-10 02:40:35.998138', 'step': 5701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:36.052875', 'step': 5701, 'epoch': 1} {'type': 'loss', 'content': 0.11503613740205765, 'timestamp': '2025-09-10 02:40:36.055201', 'step': 5702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:36.110946', 'step': 5702, 'epoch': 1} {'type': 'loss', 'content': 0.17261676490306854, 'timestamp': '2025-09-10 02:40:36.113196', 'step': 5703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:36.169510', 'step': 5703, 'epoch': 1} {'type': 'loss', 'content': 0.13536040484905243, 'timestamp': '2025-09-10 02:40:36.176206', 'step': 5704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:36.231910', 'step': 5704, 'epoch': 1} {'type': 'loss', 'content': 0.18927013874053955, 'timestamp': '2025-09-10 02:40:36.234065', 'step': 5705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:36.290113', 'step': 5705, 'epoch': 1} {'type': 'loss', 'content': 0.14057402312755585, 'timestamp': '2025-09-10 02:40:36.292647', 'step': 5706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:36.350918', 'step': 5706, 'epoch': 1} {'type': 'loss', 'content': 0.1565617024898529, 'timestamp': '2025-09-10 02:40:36.353190', 'step': 5707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:36.410242', 'step': 5707, 'epoch': 1} {'type': 'loss', 'content': 0.13783852756023407, 'timestamp': '2025-09-10 02:40:36.416953', 'step': 5708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:36.475848', 'step': 5708, 'epoch': 1} {'type': 'loss', 'content': 0.22808092832565308, 'timestamp': '2025-09-10 02:40:36.478070', 'step': 5709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:36.533690', 'step': 5709, 'epoch': 1} {'type': 'loss', 'content': 0.20158016681671143, 'timestamp': '2025-09-10 02:40:36.536197', 'step': 5710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:36.592334', 'step': 5710, 'epoch': 1} {'type': 'loss', 'content': 0.1536923497915268, 'timestamp': '2025-09-10 02:40:36.594583', 'step': 5711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:36.651098', 'step': 5711, 'epoch': 1} {'type': 'loss', 'content': 0.07549263536930084, 'timestamp': '2025-09-10 02:40:36.658097', 'step': 5712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:36.713257', 'step': 5712, 'epoch': 1} {'type': 'loss', 'content': 0.1448492407798767, 'timestamp': '2025-09-10 02:40:36.717188', 'step': 5713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:36.774887', 'step': 5713, 'epoch': 1} {'type': 'loss', 'content': 0.17367832362651825, 'timestamp': '2025-09-10 02:40:36.777114', 'step': 5714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:36.834972', 'step': 5714, 'epoch': 1} {'type': 'loss', 'content': 0.17604216933250427, 'timestamp': '2025-09-10 02:40:36.839240', 'step': 5715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:36.896142', 'step': 5715, 'epoch': 1} {'type': 'loss', 'content': 0.1139640063047409, 'timestamp': '2025-09-10 02:40:36.902887', 'step': 5716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:36.966006', 'step': 5716, 'epoch': 1} {'type': 'loss', 'content': 0.20363299548625946, 'timestamp': '2025-09-10 02:40:36.968598', 'step': 5717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:37.039143', 'step': 5717, 'epoch': 1} {'type': 'loss', 'content': 0.13037163019180298, 'timestamp': '2025-09-10 02:40:37.042691', 'step': 5718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:37.125797', 'step': 5718, 'epoch': 1} {'type': 'loss', 'content': 0.09924490749835968, 'timestamp': '2025-09-10 02:40:37.128339', 'step': 5719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:37.185141', 'step': 5719, 'epoch': 1} {'type': 'loss', 'content': 0.2262554168701172, 'timestamp': '2025-09-10 02:40:37.192160', 'step': 5720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:37.249442', 'step': 5720, 'epoch': 1} {'type': 'loss', 'content': 0.14470277726650238, 'timestamp': '2025-09-10 02:40:37.257022', 'step': 5721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:37.317892', 'step': 5721, 'epoch': 1} {'type': 'loss', 'content': 0.14739665389060974, 'timestamp': '2025-09-10 02:40:37.321843', 'step': 5722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:37.379859', 'step': 5722, 'epoch': 1} {'type': 'loss', 'content': 0.17441709339618683, 'timestamp': '2025-09-10 02:40:37.382324', 'step': 5723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:37.439270', 'step': 5723, 'epoch': 1} {'type': 'loss', 'content': 0.13719500601291656, 'timestamp': '2025-09-10 02:40:37.446243', 'step': 5724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:37.508068', 'step': 5724, 'epoch': 1} {'type': 'loss', 'content': 0.14941388368606567, 'timestamp': '2025-09-10 02:40:37.510509', 'step': 5725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:37.587345', 'step': 5725, 'epoch': 1} {'type': 'loss', 'content': 0.20581816136837006, 'timestamp': '2025-09-10 02:40:37.589917', 'step': 5726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:37.701153', 'step': 5726, 'epoch': 1} {'type': 'loss', 'content': 0.15922491252422333, 'timestamp': '2025-09-10 02:40:37.703598', 'step': 5727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:37.763356', 'step': 5727, 'epoch': 1} {'type': 'loss', 'content': 0.13492467999458313, 'timestamp': '2025-09-10 02:40:37.770240', 'step': 5728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:37.826809', 'step': 5728, 'epoch': 1} {'type': 'loss', 'content': 0.15346433222293854, 'timestamp': '2025-09-10 02:40:37.829472', 'step': 5729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:37.889440', 'step': 5729, 'epoch': 1} {'type': 'loss', 'content': 0.1493545025587082, 'timestamp': '2025-09-10 02:40:37.892018', 'step': 5730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:37.950445', 'step': 5730, 'epoch': 1} {'type': 'loss', 'content': 0.08134090155363083, 'timestamp': '2025-09-10 02:40:37.952977', 'step': 5731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:38.010839', 'step': 5731, 'epoch': 1} {'type': 'loss', 'content': 0.12971672415733337, 'timestamp': '2025-09-10 02:40:38.019154', 'step': 5732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:38.078202', 'step': 5732, 'epoch': 1} {'type': 'loss', 'content': 0.1481531262397766, 'timestamp': '2025-09-10 02:40:38.080572', 'step': 5733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:38.137579', 'step': 5733, 'epoch': 1} {'type': 'loss', 'content': 0.08482227474451065, 'timestamp': '2025-09-10 02:40:38.139854', 'step': 5734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:38.198660', 'step': 5734, 'epoch': 1} {'type': 'loss', 'content': 0.16137439012527466, 'timestamp': '2025-09-10 02:40:38.201187', 'step': 5735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:38.278811', 'step': 5735, 'epoch': 1} {'type': 'loss', 'content': 0.1398795247077942, 'timestamp': '2025-09-10 02:40:38.285616', 'step': 5736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:38.356232', 'step': 5736, 'epoch': 1} {'type': 'loss', 'content': 0.1645096242427826, 'timestamp': '2025-09-10 02:40:38.358547', 'step': 5737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:38.418866', 'step': 5737, 'epoch': 1} {'type': 'loss', 'content': 0.09623270481824875, 'timestamp': '2025-09-10 02:40:38.421371', 'step': 5738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:38.484916', 'step': 5738, 'epoch': 1} {'type': 'loss', 'content': 0.17382146418094635, 'timestamp': '2025-09-10 02:40:38.487402', 'step': 5739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:38.561205', 'step': 5739, 'epoch': 1} {'type': 'loss', 'content': 0.16299255192279816, 'timestamp': '2025-09-10 02:40:38.568370', 'step': 5740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:38.631049', 'step': 5740, 'epoch': 1} {'type': 'loss', 'content': 0.1145024448633194, 'timestamp': '2025-09-10 02:40:38.633777', 'step': 5741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:38.692221', 'step': 5741, 'epoch': 1} {'type': 'loss', 'content': 0.15565121173858643, 'timestamp': '2025-09-10 02:40:38.694796', 'step': 5742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:38.762334', 'step': 5742, 'epoch': 1} {'type': 'loss', 'content': 0.1270039975643158, 'timestamp': '2025-09-10 02:40:38.764733', 'step': 5743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:38.822953', 'step': 5743, 'epoch': 1} {'type': 'loss', 'content': 0.11776001006364822, 'timestamp': '2025-09-10 02:40:38.829824', 'step': 5744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:38.886823', 'step': 5744, 'epoch': 1} {'type': 'loss', 'content': 0.1044863909482956, 'timestamp': '2025-09-10 02:40:38.889374', 'step': 5745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:38.945528', 'step': 5745, 'epoch': 1} {'type': 'loss', 'content': 0.21018511056900024, 'timestamp': '2025-09-10 02:40:38.947909', 'step': 5746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:39.004255', 'step': 5746, 'epoch': 1} {'type': 'loss', 'content': 0.17688274383544922, 'timestamp': '2025-09-10 02:40:39.006739', 'step': 5747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:39.064669', 'step': 5747, 'epoch': 1} {'type': 'loss', 'content': 0.17433419823646545, 'timestamp': '2025-09-10 02:40:39.071410', 'step': 5748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:39.126818', 'step': 5748, 'epoch': 1} {'type': 'loss', 'content': 0.21783970296382904, 'timestamp': '2025-09-10 02:40:39.129185', 'step': 5749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:39.185069', 'step': 5749, 'epoch': 1} {'type': 'loss', 'content': 0.1536119431257248, 'timestamp': '2025-09-10 02:40:39.187444', 'step': 5750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:39.242449', 'step': 5750, 'epoch': 1} {'type': 'loss', 'content': 0.15990714728832245, 'timestamp': '2025-09-10 02:40:39.244656', 'step': 5751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:39.299473', 'step': 5751, 'epoch': 1} {'type': 'loss', 'content': 0.10997957736253738, 'timestamp': '2025-09-10 02:40:39.305945', 'step': 5752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:39.359649', 'step': 5752, 'epoch': 1} {'type': 'loss', 'content': 0.08195990324020386, 'timestamp': '2025-09-10 02:40:39.361881', 'step': 5753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:39.416298', 'step': 5753, 'epoch': 1} {'type': 'loss', 'content': 0.08773329108953476, 'timestamp': '2025-09-10 02:40:39.418565', 'step': 5754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:39.472664', 'step': 5754, 'epoch': 1} {'type': 'loss', 'content': 0.13419893383979797, 'timestamp': '2025-09-10 02:40:39.475372', 'step': 5755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:39.530261', 'step': 5755, 'epoch': 1} {'type': 'loss', 'content': 0.12053568661212921, 'timestamp': '2025-09-10 02:40:39.537502', 'step': 5756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:39.592083', 'step': 5756, 'epoch': 1} {'type': 'loss', 'content': 0.15740391612052917, 'timestamp': '2025-09-10 02:40:39.594768', 'step': 5757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:39.649467', 'step': 5757, 'epoch': 1} {'type': 'loss', 'content': 0.16880591213703156, 'timestamp': '2025-09-10 02:40:39.652264', 'step': 5758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:39.708142', 'step': 5758, 'epoch': 1} {'type': 'loss', 'content': 0.11285079270601273, 'timestamp': '2025-09-10 02:40:39.710592', 'step': 5759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:39.764906', 'step': 5759, 'epoch': 1} {'type': 'loss', 'content': 0.1189250573515892, 'timestamp': '2025-09-10 02:40:39.771181', 'step': 5760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:39.832755', 'step': 5760, 'epoch': 1} {'type': 'loss', 'content': 0.16353413462638855, 'timestamp': '2025-09-10 02:40:39.834990', 'step': 5761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:39.898823', 'step': 5761, 'epoch': 1} {'type': 'loss', 'content': 0.14668583869934082, 'timestamp': '2025-09-10 02:40:39.901355', 'step': 5762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:39.963939', 'step': 5762, 'epoch': 1} {'type': 'loss', 'content': 0.07392630726099014, 'timestamp': '2025-09-10 02:40:39.966192', 'step': 5763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:40.030670', 'step': 5763, 'epoch': 1} {'type': 'loss', 'content': 0.11964656412601471, 'timestamp': '2025-09-10 02:40:40.037179', 'step': 5764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:40.101115', 'step': 5764, 'epoch': 1} {'type': 'loss', 'content': 0.3379597067832947, 'timestamp': '2025-09-10 02:40:40.103439', 'step': 5765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:40.162090', 'step': 5765, 'epoch': 1} {'type': 'loss', 'content': 0.18448056280612946, 'timestamp': '2025-09-10 02:40:40.164548', 'step': 5766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:40.219932', 'step': 5766, 'epoch': 1} {'type': 'loss', 'content': 0.180422842502594, 'timestamp': '2025-09-10 02:40:40.222303', 'step': 5767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:40.277469', 'step': 5767, 'epoch': 1} {'type': 'loss', 'content': 0.17136120796203613, 'timestamp': '2025-09-10 02:40:40.283880', 'step': 5768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:40.343160', 'step': 5768, 'epoch': 1} {'type': 'loss', 'content': 0.14235807955265045, 'timestamp': '2025-09-10 02:40:40.345697', 'step': 5769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:40.400422', 'step': 5769, 'epoch': 1} {'type': 'loss', 'content': 0.19081304967403412, 'timestamp': '2025-09-10 02:40:40.402826', 'step': 5770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:40.457204', 'step': 5770, 'epoch': 1} {'type': 'loss', 'content': 0.2119550108909607, 'timestamp': '2025-09-10 02:40:40.460991', 'step': 5771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:40.518084', 'step': 5771, 'epoch': 1} {'type': 'loss', 'content': 0.3001287579536438, 'timestamp': '2025-09-10 02:40:40.524716', 'step': 5772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:40.580722', 'step': 5772, 'epoch': 1} {'type': 'loss', 'content': 0.19379310309886932, 'timestamp': '2025-09-10 02:40:40.583325', 'step': 5773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:40.639630', 'step': 5773, 'epoch': 1} {'type': 'loss', 'content': 0.16202935576438904, 'timestamp': '2025-09-10 02:40:40.641841', 'step': 5774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:40.698216', 'step': 5774, 'epoch': 1} {'type': 'loss', 'content': 0.1883753091096878, 'timestamp': '2025-09-10 02:40:40.700502', 'step': 5775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:40.756588', 'step': 5775, 'epoch': 1} {'type': 'loss', 'content': 0.13395313918590546, 'timestamp': '2025-09-10 02:40:40.763487', 'step': 5776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:40.821797', 'step': 5776, 'epoch': 1} {'type': 'loss', 'content': 0.10958650708198547, 'timestamp': '2025-09-10 02:40:40.824096', 'step': 5777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:40.880702', 'step': 5777, 'epoch': 1} {'type': 'loss', 'content': 0.1472661942243576, 'timestamp': '2025-09-10 02:40:40.883099', 'step': 5778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:40.940339', 'step': 5778, 'epoch': 1} {'type': 'loss', 'content': 0.20364008843898773, 'timestamp': '2025-09-10 02:40:40.942627', 'step': 5779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:40.998898', 'step': 5779, 'epoch': 1} {'type': 'loss', 'content': 0.20495732128620148, 'timestamp': '2025-09-10 02:40:41.005867', 'step': 5780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:41.061109', 'step': 5780, 'epoch': 1} {'type': 'loss', 'content': 0.1639748513698578, 'timestamp': '2025-09-10 02:40:41.063565', 'step': 5781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:41.122320', 'step': 5781, 'epoch': 1} {'type': 'loss', 'content': 0.10082392394542694, 'timestamp': '2025-09-10 02:40:41.124776', 'step': 5782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:41.181279', 'step': 5782, 'epoch': 1} {'type': 'loss', 'content': 0.19888935983181, 'timestamp': '2025-09-10 02:40:41.183532', 'step': 5783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:41.239381', 'step': 5783, 'epoch': 1} {'type': 'loss', 'content': 0.15816353261470795, 'timestamp': '2025-09-10 02:40:41.246374', 'step': 5784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:41.301448', 'step': 5784, 'epoch': 1} {'type': 'loss', 'content': 0.17124301195144653, 'timestamp': '2025-09-10 02:40:41.303845', 'step': 5785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:41.360341', 'step': 5785, 'epoch': 1} {'type': 'loss', 'content': 0.21918714046478271, 'timestamp': '2025-09-10 02:40:41.362623', 'step': 5786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:41.419335', 'step': 5786, 'epoch': 1} {'type': 'loss', 'content': 0.22737324237823486, 'timestamp': '2025-09-10 02:40:41.421666', 'step': 5787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:41.477735', 'step': 5787, 'epoch': 1} {'type': 'loss', 'content': 0.1058959811925888, 'timestamp': '2025-09-10 02:40:41.484517', 'step': 5788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:41.539947', 'step': 5788, 'epoch': 1} {'type': 'loss', 'content': 0.17667463421821594, 'timestamp': '2025-09-10 02:40:41.542447', 'step': 5789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:41.598636', 'step': 5789, 'epoch': 1} {'type': 'loss', 'content': 0.21067872643470764, 'timestamp': '2025-09-10 02:40:41.601112', 'step': 5790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:41.658472', 'step': 5790, 'epoch': 1} {'type': 'loss', 'content': 0.15701191127300262, 'timestamp': '2025-09-10 02:40:41.660993', 'step': 5791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:41.718595', 'step': 5791, 'epoch': 1} {'type': 'loss', 'content': 0.12357261776924133, 'timestamp': '2025-09-10 02:40:41.725623', 'step': 5792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:41.781595', 'step': 5792, 'epoch': 1} {'type': 'loss', 'content': 0.17899522185325623, 'timestamp': '2025-09-10 02:40:41.783796', 'step': 5793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:41.840501', 'step': 5793, 'epoch': 1} {'type': 'loss', 'content': 0.19222332537174225, 'timestamp': '2025-09-10 02:40:41.842888', 'step': 5794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:41.900385', 'step': 5794, 'epoch': 1} {'type': 'loss', 'content': 0.18024495244026184, 'timestamp': '2025-09-10 02:40:41.902742', 'step': 5795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:41.959052', 'step': 5795, 'epoch': 1} {'type': 'loss', 'content': 0.21734966337680817, 'timestamp': '2025-09-10 02:40:41.966071', 'step': 5796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:42.021408', 'step': 5796, 'epoch': 1} {'type': 'loss', 'content': 0.1720333993434906, 'timestamp': '2025-09-10 02:40:42.023683', 'step': 5797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:42.077435', 'step': 5797, 'epoch': 1} {'type': 'loss', 'content': 0.09052964299917221, 'timestamp': '2025-09-10 02:40:42.079741', 'step': 5798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:42.134239', 'step': 5798, 'epoch': 1} {'type': 'loss', 'content': 0.12486614286899567, 'timestamp': '2025-09-10 02:40:42.136663', 'step': 5799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:42.190305', 'step': 5799, 'epoch': 1} {'type': 'loss', 'content': 0.12358060479164124, 'timestamp': '2025-09-10 02:40:42.196646', 'step': 5800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:42.253629', 'step': 5800, 'epoch': 1} {'type': 'loss', 'content': 0.26412612199783325, 'timestamp': '2025-09-10 02:40:42.255868', 'step': 5801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:42.309390', 'step': 5801, 'epoch': 1} {'type': 'loss', 'content': 0.2376449555158615, 'timestamp': '2025-09-10 02:40:42.311651', 'step': 5802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:42.365411', 'step': 5802, 'epoch': 1} {'type': 'loss', 'content': 0.058208074420690536, 'timestamp': '2025-09-10 02:40:42.369285', 'step': 5803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:42.423998', 'step': 5803, 'epoch': 1} {'type': 'loss', 'content': 0.13680654764175415, 'timestamp': '2025-09-10 02:40:42.430100', 'step': 5804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:42.482460', 'step': 5804, 'epoch': 1} {'type': 'loss', 'content': 0.10727537423372269, 'timestamp': '2025-09-10 02:40:42.484648', 'step': 5805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:42.540304', 'step': 5805, 'epoch': 1} {'type': 'loss', 'content': 0.20960333943367004, 'timestamp': '2025-09-10 02:40:42.542657', 'step': 5806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:42.599088', 'step': 5806, 'epoch': 1} {'type': 'loss', 'content': 0.13936816155910492, 'timestamp': '2025-09-10 02:40:42.601337', 'step': 5807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:42.656690', 'step': 5807, 'epoch': 1} {'type': 'loss', 'content': 0.19996702671051025, 'timestamp': '2025-09-10 02:40:42.663161', 'step': 5808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:42.718937', 'step': 5808, 'epoch': 1} {'type': 'loss', 'content': 0.13917993009090424, 'timestamp': '2025-09-10 02:40:42.721164', 'step': 5809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:42.776140', 'step': 5809, 'epoch': 1} {'type': 'loss', 'content': 0.12239231914281845, 'timestamp': '2025-09-10 02:40:42.778680', 'step': 5810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:42.832961', 'step': 5810, 'epoch': 1} {'type': 'loss', 'content': 0.08310645073652267, 'timestamp': '2025-09-10 02:40:42.835173', 'step': 5811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:42.889244', 'step': 5811, 'epoch': 1} {'type': 'loss', 'content': 0.11116574704647064, 'timestamp': '2025-09-10 02:40:42.895824', 'step': 5812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:42.949960', 'step': 5812, 'epoch': 1} {'type': 'loss', 'content': 0.18266122043132782, 'timestamp': '2025-09-10 02:40:42.952343', 'step': 5813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:43.007204', 'step': 5813, 'epoch': 1} {'type': 'loss', 'content': 0.15099073946475983, 'timestamp': '2025-09-10 02:40:43.009660', 'step': 5814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:43.064396', 'step': 5814, 'epoch': 1} {'type': 'loss', 'content': 0.154771089553833, 'timestamp': '2025-09-10 02:40:43.066790', 'step': 5815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:43.122934', 'step': 5815, 'epoch': 1} {'type': 'loss', 'content': 0.16759918630123138, 'timestamp': '2025-09-10 02:40:43.129104', 'step': 5816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:43.182423', 'step': 5816, 'epoch': 1} {'type': 'loss', 'content': 0.13491959869861603, 'timestamp': '2025-09-10 02:40:43.184583', 'step': 5817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:43.237483', 'step': 5817, 'epoch': 1} {'type': 'loss', 'content': 0.16241426765918732, 'timestamp': '2025-09-10 02:40:43.239857', 'step': 5818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:43.293920', 'step': 5818, 'epoch': 1} {'type': 'loss', 'content': 0.07782173901796341, 'timestamp': '2025-09-10 02:40:43.296199', 'step': 5819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:43.350170', 'step': 5819, 'epoch': 1} {'type': 'loss', 'content': 0.17789626121520996, 'timestamp': '2025-09-10 02:40:43.356594', 'step': 5820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:43.410412', 'step': 5820, 'epoch': 1} {'type': 'loss', 'content': 0.14662933349609375, 'timestamp': '2025-09-10 02:40:43.412834', 'step': 5821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:43.467145', 'step': 5821, 'epoch': 1} {'type': 'loss', 'content': 0.1990969032049179, 'timestamp': '2025-09-10 02:40:43.469408', 'step': 5822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:43.523096', 'step': 5822, 'epoch': 1} {'type': 'loss', 'content': 0.13064412772655487, 'timestamp': '2025-09-10 02:40:43.525224', 'step': 5823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:43.578605', 'step': 5823, 'epoch': 1} {'type': 'loss', 'content': 0.14227981865406036, 'timestamp': '2025-09-10 02:40:43.584649', 'step': 5824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:43.636956', 'step': 5824, 'epoch': 1} {'type': 'loss', 'content': 0.1453225016593933, 'timestamp': '2025-09-10 02:40:43.639514', 'step': 5825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:43.692403', 'step': 5825, 'epoch': 1} {'type': 'loss', 'content': 0.1874299943447113, 'timestamp': '2025-09-10 02:40:43.694578', 'step': 5826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:43.747324', 'step': 5826, 'epoch': 1} {'type': 'loss', 'content': 0.1517285853624344, 'timestamp': '2025-09-10 02:40:43.749877', 'step': 5827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:43.804215', 'step': 5827, 'epoch': 1} {'type': 'loss', 'content': 0.11395437270402908, 'timestamp': '2025-09-10 02:40:43.810619', 'step': 5828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:43.864645', 'step': 5828, 'epoch': 1} {'type': 'loss', 'content': 0.1651192009449005, 'timestamp': '2025-09-10 02:40:43.866933', 'step': 5829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:43.920624', 'step': 5829, 'epoch': 1} {'type': 'loss', 'content': 0.13673105835914612, 'timestamp': '2025-09-10 02:40:43.922841', 'step': 5830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:43.975617', 'step': 5830, 'epoch': 1} {'type': 'loss', 'content': 0.20555178821086884, 'timestamp': '2025-09-10 02:40:43.978007', 'step': 5831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:44.031290', 'step': 5831, 'epoch': 1} {'type': 'loss', 'content': 0.1295488327741623, 'timestamp': '2025-09-10 02:40:44.037381', 'step': 5832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:44.089927', 'step': 5832, 'epoch': 1} {'type': 'loss', 'content': 0.2028728872537613, 'timestamp': '2025-09-10 02:40:44.092071', 'step': 5833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:44.145004', 'step': 5833, 'epoch': 1} {'type': 'loss', 'content': 0.1464933454990387, 'timestamp': '2025-09-10 02:40:44.147457', 'step': 5834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:44.200901', 'step': 5834, 'epoch': 1} {'type': 'loss', 'content': 0.07491686940193176, 'timestamp': '2025-09-10 02:40:44.203305', 'step': 5835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:44.257000', 'step': 5835, 'epoch': 1} {'type': 'loss', 'content': 0.18858638405799866, 'timestamp': '2025-09-10 02:40:44.263048', 'step': 5836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:44.317184', 'step': 5836, 'epoch': 1} {'type': 'loss', 'content': 0.11837365478277206, 'timestamp': '2025-09-10 02:40:44.319538', 'step': 5837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:44.373297', 'step': 5837, 'epoch': 1} {'type': 'loss', 'content': 0.14795446395874023, 'timestamp': '2025-09-10 02:40:44.375912', 'step': 5838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:44.429641', 'step': 5838, 'epoch': 1} {'type': 'loss', 'content': 0.15071898698806763, 'timestamp': '2025-09-10 02:40:44.431796', 'step': 5839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:44.484638', 'step': 5839, 'epoch': 1} {'type': 'loss', 'content': 0.13905572891235352, 'timestamp': '2025-09-10 02:40:44.490863', 'step': 5840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:44.543201', 'step': 5840, 'epoch': 1} {'type': 'loss', 'content': 0.1245129406452179, 'timestamp': '2025-09-10 02:40:44.545490', 'step': 5841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:44.598549', 'step': 5841, 'epoch': 1} {'type': 'loss', 'content': 0.16587993502616882, 'timestamp': '2025-09-10 02:40:44.600824', 'step': 5842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:44.654433', 'step': 5842, 'epoch': 1} {'type': 'loss', 'content': 0.1072050929069519, 'timestamp': '2025-09-10 02:40:44.656805', 'step': 5843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:44.710121', 'step': 5843, 'epoch': 1} {'type': 'loss', 'content': 0.13785433769226074, 'timestamp': '2025-09-10 02:40:44.716127', 'step': 5844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:44.768579', 'step': 5844, 'epoch': 1} {'type': 'loss', 'content': 0.13556425273418427, 'timestamp': '2025-09-10 02:40:44.770773', 'step': 5845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:44.823685', 'step': 5845, 'epoch': 1} {'type': 'loss', 'content': 0.17861062288284302, 'timestamp': '2025-09-10 02:40:44.825830', 'step': 5846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:44.879430', 'step': 5846, 'epoch': 1} {'type': 'loss', 'content': 0.25086402893066406, 'timestamp': '2025-09-10 02:40:44.881798', 'step': 5847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:44.935259', 'step': 5847, 'epoch': 1} {'type': 'loss', 'content': 0.19141733646392822, 'timestamp': '2025-09-10 02:40:44.941362', 'step': 5848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:44.994578', 'step': 5848, 'epoch': 1} {'type': 'loss', 'content': 0.17356036603450775, 'timestamp': '2025-09-10 02:40:44.996762', 'step': 5849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:45.049972', 'step': 5849, 'epoch': 1} {'type': 'loss', 'content': 0.23226164281368256, 'timestamp': '2025-09-10 02:40:45.052149', 'step': 5850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:45.105289', 'step': 5850, 'epoch': 1} {'type': 'loss', 'content': 0.20256830751895905, 'timestamp': '2025-09-10 02:40:45.107824', 'step': 5851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:45.161175', 'step': 5851, 'epoch': 1} {'type': 'loss', 'content': 0.13299646973609924, 'timestamp': '2025-09-10 02:40:45.167264', 'step': 5852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:45.221271', 'step': 5852, 'epoch': 1} {'type': 'loss', 'content': 0.18581831455230713, 'timestamp': '2025-09-10 02:40:45.223727', 'step': 5853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:45.276605', 'step': 5853, 'epoch': 1} {'type': 'loss', 'content': 0.12788943946361542, 'timestamp': '2025-09-10 02:40:45.278968', 'step': 5854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:45.336231', 'step': 5854, 'epoch': 1} {'type': 'loss', 'content': 0.20071539282798767, 'timestamp': '2025-09-10 02:40:45.338653', 'step': 5855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:45.392483', 'step': 5855, 'epoch': 1} {'type': 'loss', 'content': 0.17044471204280853, 'timestamp': '2025-09-10 02:40:45.398736', 'step': 5856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:45.451456', 'step': 5856, 'epoch': 1} {'type': 'loss', 'content': 0.24994543194770813, 'timestamp': '2025-09-10 02:40:45.453984', 'step': 5857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:45.507335', 'step': 5857, 'epoch': 1} {'type': 'loss', 'content': 0.1529436856508255, 'timestamp': '2025-09-10 02:40:45.509747', 'step': 5858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:45.563483', 'step': 5858, 'epoch': 1} {'type': 'loss', 'content': 0.18858295679092407, 'timestamp': '2025-09-10 02:40:45.565909', 'step': 5859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:45.619453', 'step': 5859, 'epoch': 1} {'type': 'loss', 'content': 0.24516648054122925, 'timestamp': '2025-09-10 02:40:45.625968', 'step': 5860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:45.678736', 'step': 5860, 'epoch': 1} {'type': 'loss', 'content': 0.15026313066482544, 'timestamp': '2025-09-10 02:40:45.681049', 'step': 5861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:45.734293', 'step': 5861, 'epoch': 1} {'type': 'loss', 'content': 0.1865909844636917, 'timestamp': '2025-09-10 02:40:45.736669', 'step': 5862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:45.790577', 'step': 5862, 'epoch': 1} {'type': 'loss', 'content': 0.15288090705871582, 'timestamp': '2025-09-10 02:40:45.792921', 'step': 5863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:45.846287', 'step': 5863, 'epoch': 1} {'type': 'loss', 'content': 0.10467863082885742, 'timestamp': '2025-09-10 02:40:45.852377', 'step': 5864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:45.905521', 'step': 5864, 'epoch': 1} {'type': 'loss', 'content': 0.13632407784461975, 'timestamp': '2025-09-10 02:40:45.907909', 'step': 5865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:45.961645', 'step': 5865, 'epoch': 1} {'type': 'loss', 'content': 0.1861594319343567, 'timestamp': '2025-09-10 02:40:45.964115', 'step': 5866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:46.017438', 'step': 5866, 'epoch': 1} {'type': 'loss', 'content': 0.18916961550712585, 'timestamp': '2025-09-10 02:40:46.019820', 'step': 5867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:46.073895', 'step': 5867, 'epoch': 1} {'type': 'loss', 'content': 0.14841987192630768, 'timestamp': '2025-09-10 02:40:46.079757', 'step': 5868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:46.146612', 'step': 5868, 'epoch': 1} {'type': 'loss', 'content': 0.1418883055448532, 'timestamp': '2025-09-10 02:40:46.148891', 'step': 5869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:46.202217', 'step': 5869, 'epoch': 1} {'type': 'loss', 'content': 0.18390682339668274, 'timestamp': '2025-09-10 02:40:46.204556', 'step': 5870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:46.258476', 'step': 5870, 'epoch': 1} {'type': 'loss', 'content': 0.1351873129606247, 'timestamp': '2025-09-10 02:40:46.260911', 'step': 5871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:46.314596', 'step': 5871, 'epoch': 1} {'type': 'loss', 'content': 0.09054096043109894, 'timestamp': '2025-09-10 02:40:46.320657', 'step': 5872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:46.373068', 'step': 5872, 'epoch': 1} {'type': 'loss', 'content': 0.16333827376365662, 'timestamp': '2025-09-10 02:40:46.375437', 'step': 5873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:46.428428', 'step': 5873, 'epoch': 1} {'type': 'loss', 'content': 0.08757752180099487, 'timestamp': '2025-09-10 02:40:46.430770', 'step': 5874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:46.483747', 'step': 5874, 'epoch': 1} {'type': 'loss', 'content': 0.16116957366466522, 'timestamp': '2025-09-10 02:40:46.486357', 'step': 5875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:46.539874', 'step': 5875, 'epoch': 1} {'type': 'loss', 'content': 0.1583082228899002, 'timestamp': '2025-09-10 02:40:46.546019', 'step': 5876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:46.598456', 'step': 5876, 'epoch': 1} {'type': 'loss', 'content': 0.10434241592884064, 'timestamp': '2025-09-10 02:40:46.600673', 'step': 5877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:46.653934', 'step': 5877, 'epoch': 1} {'type': 'loss', 'content': 0.09524209052324295, 'timestamp': '2025-09-10 02:40:46.656201', 'step': 5878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:46.709329', 'step': 5878, 'epoch': 1} {'type': 'loss', 'content': 0.17571286857128143, 'timestamp': '2025-09-10 02:40:46.711732', 'step': 5879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:46.765295', 'step': 5879, 'epoch': 1} {'type': 'loss', 'content': 0.16990050673484802, 'timestamp': '2025-09-10 02:40:46.771115', 'step': 5880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:46.823880', 'step': 5880, 'epoch': 1} {'type': 'loss', 'content': 0.21266020834445953, 'timestamp': '2025-09-10 02:40:46.826357', 'step': 5881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:46.879201', 'step': 5881, 'epoch': 1} {'type': 'loss', 'content': 0.17607761919498444, 'timestamp': '2025-09-10 02:40:46.881589', 'step': 5882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:46.934955', 'step': 5882, 'epoch': 1} {'type': 'loss', 'content': 0.11765056848526001, 'timestamp': '2025-09-10 02:40:46.937433', 'step': 5883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:46.990698', 'step': 5883, 'epoch': 1} {'type': 'loss', 'content': 0.21058717370033264, 'timestamp': '2025-09-10 02:40:46.996610', 'step': 5884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:47.049424', 'step': 5884, 'epoch': 1} {'type': 'loss', 'content': 0.048353906720876694, 'timestamp': '2025-09-10 02:40:47.051841', 'step': 5885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:47.105867', 'step': 5885, 'epoch': 1} {'type': 'loss', 'content': 0.1724618375301361, 'timestamp': '2025-09-10 02:40:47.108294', 'step': 5886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:47.162491', 'step': 5886, 'epoch': 1} {'type': 'loss', 'content': 0.11850791424512863, 'timestamp': '2025-09-10 02:40:47.165065', 'step': 5887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:47.219575', 'step': 5887, 'epoch': 1} {'type': 'loss', 'content': 0.16829383373260498, 'timestamp': '2025-09-10 02:40:47.225652', 'step': 5888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:47.278611', 'step': 5888, 'epoch': 1} {'type': 'loss', 'content': 0.2883816659450531, 'timestamp': '2025-09-10 02:40:47.280967', 'step': 5889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:47.335409', 'step': 5889, 'epoch': 1} {'type': 'loss', 'content': 0.1319553107023239, 'timestamp': '2025-09-10 02:40:47.337866', 'step': 5890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:47.391299', 'step': 5890, 'epoch': 1} {'type': 'loss', 'content': 0.18304723501205444, 'timestamp': '2025-09-10 02:40:47.393782', 'step': 5891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:47.447703', 'step': 5891, 'epoch': 1} {'type': 'loss', 'content': 0.11106696724891663, 'timestamp': '2025-09-10 02:40:47.454168', 'step': 5892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:47.507642', 'step': 5892, 'epoch': 1} {'type': 'loss', 'content': 0.1061641126871109, 'timestamp': '2025-09-10 02:40:47.510160', 'step': 5893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:47.564097', 'step': 5893, 'epoch': 1} {'type': 'loss', 'content': 0.1411406248807907, 'timestamp': '2025-09-10 02:40:47.566578', 'step': 5894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:47.621220', 'step': 5894, 'epoch': 1} {'type': 'loss', 'content': 0.23143306374549866, 'timestamp': '2025-09-10 02:40:47.623601', 'step': 5895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:47.677209', 'step': 5895, 'epoch': 1} {'type': 'loss', 'content': 0.17264382541179657, 'timestamp': '2025-09-10 02:40:47.683526', 'step': 5896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:47.736645', 'step': 5896, 'epoch': 1} {'type': 'loss', 'content': 0.20329636335372925, 'timestamp': '2025-09-10 02:40:47.739241', 'step': 5897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:47.796009', 'step': 5897, 'epoch': 1} {'type': 'loss', 'content': 0.14114217460155487, 'timestamp': '2025-09-10 02:40:47.798710', 'step': 5898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:47.852592', 'step': 5898, 'epoch': 1} {'type': 'loss', 'content': 0.08900117874145508, 'timestamp': '2025-09-10 02:40:47.855082', 'step': 5899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:47.910081', 'step': 5899, 'epoch': 1} {'type': 'loss', 'content': 0.2129576951265335, 'timestamp': '2025-09-10 02:40:47.916472', 'step': 5900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:47.969691', 'step': 5900, 'epoch': 1} {'type': 'loss', 'content': 0.14046570658683777, 'timestamp': '2025-09-10 02:40:47.972366', 'step': 5901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:48.025556', 'step': 5901, 'epoch': 1} {'type': 'loss', 'content': 0.16273877024650574, 'timestamp': '2025-09-10 02:40:48.027960', 'step': 5902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:48.082092', 'step': 5902, 'epoch': 1} {'type': 'loss', 'content': 0.101884625852108, 'timestamp': '2025-09-10 02:40:48.084536', 'step': 5903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:48.141790', 'step': 5903, 'epoch': 1} {'type': 'loss', 'content': 0.29143762588500977, 'timestamp': '2025-09-10 02:40:48.148140', 'step': 5904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:48.202085', 'step': 5904, 'epoch': 1} {'type': 'loss', 'content': 0.1814015656709671, 'timestamp': '2025-09-10 02:40:48.204521', 'step': 5905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:48.257871', 'step': 5905, 'epoch': 1} {'type': 'loss', 'content': 0.18957044184207916, 'timestamp': '2025-09-10 02:40:48.260278', 'step': 5906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:48.314802', 'step': 5906, 'epoch': 1} {'type': 'loss', 'content': 0.18978483974933624, 'timestamp': '2025-09-10 02:40:48.317158', 'step': 5907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:48.370952', 'step': 5907, 'epoch': 1} {'type': 'loss', 'content': 0.15087220072746277, 'timestamp': '2025-09-10 02:40:48.377215', 'step': 5908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:48.439131', 'step': 5908, 'epoch': 1} {'type': 'loss', 'content': 0.13454784452915192, 'timestamp': '2025-09-10 02:40:48.441519', 'step': 5909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:48.494927', 'step': 5909, 'epoch': 1} {'type': 'loss', 'content': 0.18843436241149902, 'timestamp': '2025-09-10 02:40:48.497292', 'step': 5910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:48.550974', 'step': 5910, 'epoch': 1} {'type': 'loss', 'content': 0.14738917350769043, 'timestamp': '2025-09-10 02:40:48.553567', 'step': 5911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:48.606782', 'step': 5911, 'epoch': 1} {'type': 'loss', 'content': 0.13286970555782318, 'timestamp': '2025-09-10 02:40:48.612641', 'step': 5912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:48.666078', 'step': 5912, 'epoch': 1} {'type': 'loss', 'content': 0.13523824512958527, 'timestamp': '2025-09-10 02:40:48.668410', 'step': 5913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:48.721875', 'step': 5913, 'epoch': 1} {'type': 'loss', 'content': 0.2914929986000061, 'timestamp': '2025-09-10 02:40:48.724511', 'step': 5914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:48.779462', 'step': 5914, 'epoch': 1} {'type': 'loss', 'content': 0.24617551267147064, 'timestamp': '2025-09-10 02:40:48.781996', 'step': 5915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:48.837159', 'step': 5915, 'epoch': 1} {'type': 'loss', 'content': 0.1401277482509613, 'timestamp': '2025-09-10 02:40:48.843708', 'step': 5916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:48.896786', 'step': 5916, 'epoch': 1} {'type': 'loss', 'content': 0.1937756985425949, 'timestamp': '2025-09-10 02:40:48.899080', 'step': 5917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:48.953242', 'step': 5917, 'epoch': 1} {'type': 'loss', 'content': 0.12211794406175613, 'timestamp': '2025-09-10 02:40:48.955638', 'step': 5918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.009444', 'step': 5918, 'epoch': 1} {'type': 'loss', 'content': 0.20012453198432922, 'timestamp': '2025-09-10 02:40:49.018210', 'step': 5919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.071883', 'step': 5919, 'epoch': 1} {'type': 'loss', 'content': 0.1921340674161911, 'timestamp': '2025-09-10 02:40:49.078367', 'step': 5920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.134435', 'step': 5920, 'epoch': 1} {'type': 'loss', 'content': 0.1346679925918579, 'timestamp': '2025-09-10 02:40:49.136748', 'step': 5921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:49.191241', 'step': 5921, 'epoch': 1} {'type': 'loss', 'content': 0.22179196774959564, 'timestamp': '2025-09-10 02:40:49.193729', 'step': 5922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.249003', 'step': 5922, 'epoch': 1} {'type': 'loss', 'content': 0.24802978336811066, 'timestamp': '2025-09-10 02:40:49.251356', 'step': 5923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:49.309868', 'step': 5923, 'epoch': 1} {'type': 'loss', 'content': 0.1545339971780777, 'timestamp': '2025-09-10 02:40:49.316226', 'step': 5924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:49.376232', 'step': 5924, 'epoch': 1} {'type': 'loss', 'content': 0.09039879590272903, 'timestamp': '2025-09-10 02:40:49.380449', 'step': 5925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:49.435712', 'step': 5925, 'epoch': 1} {'type': 'loss', 'content': 0.14392603933811188, 'timestamp': '2025-09-10 02:40:49.438036', 'step': 5926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.491566', 'step': 5926, 'epoch': 1} {'type': 'loss', 'content': 0.12343666702508926, 'timestamp': '2025-09-10 02:40:49.493981', 'step': 5927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.549117', 'step': 5927, 'epoch': 1} {'type': 'loss', 'content': 0.0760350376367569, 'timestamp': '2025-09-10 02:40:49.558123', 'step': 5928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.629572', 'step': 5928, 'epoch': 1} {'type': 'loss', 'content': 0.10013464838266373, 'timestamp': '2025-09-10 02:40:49.632385', 'step': 5929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.687754', 'step': 5929, 'epoch': 1} {'type': 'loss', 'content': 0.07583625614643097, 'timestamp': '2025-09-10 02:40:49.690414', 'step': 5930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.744481', 'step': 5930, 'epoch': 1} {'type': 'loss', 'content': 0.17348021268844604, 'timestamp': '2025-09-10 02:40:49.746541', 'step': 5931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.803185', 'step': 5931, 'epoch': 1} {'type': 'loss', 'content': 0.17189259827136993, 'timestamp': '2025-09-10 02:40:49.809200', 'step': 5932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:49.862999', 'step': 5932, 'epoch': 1} {'type': 'loss', 'content': 0.21462835371494293, 'timestamp': '2025-09-10 02:40:49.866904', 'step': 5933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:49.923974', 'step': 5933, 'epoch': 1} {'type': 'loss', 'content': 0.12029334902763367, 'timestamp': '2025-09-10 02:40:49.926399', 'step': 5934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:49.985115', 'step': 5934, 'epoch': 1} {'type': 'loss', 'content': 0.18725138902664185, 'timestamp': '2025-09-10 02:40:49.987552', 'step': 5935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:50.042237', 'step': 5935, 'epoch': 1} {'type': 'loss', 'content': 0.12952202558517456, 'timestamp': '2025-09-10 02:40:50.048690', 'step': 5936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:50.105628', 'step': 5936, 'epoch': 1} {'type': 'loss', 'content': 0.12792618572711945, 'timestamp': '2025-09-10 02:40:50.108023', 'step': 5937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:50.164088', 'step': 5937, 'epoch': 1} {'type': 'loss', 'content': 0.18035204708576202, 'timestamp': '2025-09-10 02:40:50.167034', 'step': 5938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:50.230180', 'step': 5938, 'epoch': 1} {'type': 'loss', 'content': 0.15719513595104218, 'timestamp': '2025-09-10 02:40:50.232564', 'step': 5939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:50.287312', 'step': 5939, 'epoch': 1} {'type': 'loss', 'content': 0.34614837169647217, 'timestamp': '2025-09-10 02:40:50.293703', 'step': 5940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:50.346883', 'step': 5940, 'epoch': 1} {'type': 'loss', 'content': 0.19110877811908722, 'timestamp': '2025-09-10 02:40:50.349309', 'step': 5941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:50.402430', 'step': 5941, 'epoch': 1} {'type': 'loss', 'content': 0.14557024836540222, 'timestamp': '2025-09-10 02:40:50.410115', 'step': 5942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:50.468464', 'step': 5942, 'epoch': 1} {'type': 'loss', 'content': 0.09259094297885895, 'timestamp': '2025-09-10 02:40:50.472458', 'step': 5943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:50.529413', 'step': 5943, 'epoch': 1} {'type': 'loss', 'content': 0.1667812317609787, 'timestamp': '2025-09-10 02:40:50.535951', 'step': 5944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:50.589115', 'step': 5944, 'epoch': 1} {'type': 'loss', 'content': 0.23253163695335388, 'timestamp': '2025-09-10 02:40:50.591466', 'step': 5945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:50.645782', 'step': 5945, 'epoch': 1} {'type': 'loss', 'content': 0.12594270706176758, 'timestamp': '2025-09-10 02:40:50.648160', 'step': 5946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:50.702427', 'step': 5946, 'epoch': 1} {'type': 'loss', 'content': 0.15489837527275085, 'timestamp': '2025-09-10 02:40:50.704928', 'step': 5947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:50.761096', 'step': 5947, 'epoch': 1} {'type': 'loss', 'content': 0.2119162529706955, 'timestamp': '2025-09-10 02:40:50.767522', 'step': 5948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:50.821393', 'step': 5948, 'epoch': 1} {'type': 'loss', 'content': 0.21130189299583435, 'timestamp': '2025-09-10 02:40:50.823762', 'step': 5949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:50.880168', 'step': 5949, 'epoch': 1} {'type': 'loss', 'content': 0.18650932610034943, 'timestamp': '2025-09-10 02:40:50.882403', 'step': 5950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:50.936467', 'step': 5950, 'epoch': 1} {'type': 'loss', 'content': 0.16162090003490448, 'timestamp': '2025-09-10 02:40:50.938553', 'step': 5951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:50.994290', 'step': 5951, 'epoch': 1} {'type': 'loss', 'content': 0.16513501107692719, 'timestamp': '2025-09-10 02:40:51.000342', 'step': 5952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:51.056126', 'step': 5952, 'epoch': 1} {'type': 'loss', 'content': 0.16102416813373566, 'timestamp': '2025-09-10 02:40:51.058600', 'step': 5953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:51.113454', 'step': 5953, 'epoch': 1} {'type': 'loss', 'content': 0.12151073664426804, 'timestamp': '2025-09-10 02:40:51.115918', 'step': 5954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:51.170725', 'step': 5954, 'epoch': 1} {'type': 'loss', 'content': 0.1405411660671234, 'timestamp': '2025-09-10 02:40:51.173182', 'step': 5955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:51.228749', 'step': 5955, 'epoch': 1} {'type': 'loss', 'content': 0.12637397646903992, 'timestamp': '2025-09-10 02:40:51.235302', 'step': 5956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:51.289312', 'step': 5956, 'epoch': 1} {'type': 'loss', 'content': 0.13705959916114807, 'timestamp': '2025-09-10 02:40:51.291867', 'step': 5957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:51.346426', 'step': 5957, 'epoch': 1} {'type': 'loss', 'content': 0.17227616906166077, 'timestamp': '2025-09-10 02:40:51.348971', 'step': 5958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:51.403319', 'step': 5958, 'epoch': 1} {'type': 'loss', 'content': 0.20555023849010468, 'timestamp': '2025-09-10 02:40:51.405857', 'step': 5959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:51.460288', 'step': 5959, 'epoch': 1} {'type': 'loss', 'content': 0.08160223066806793, 'timestamp': '2025-09-10 02:40:51.466615', 'step': 5960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:51.519796', 'step': 5960, 'epoch': 1} {'type': 'loss', 'content': 0.24502165615558624, 'timestamp': '2025-09-10 02:40:51.522175', 'step': 5961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:51.575772', 'step': 5961, 'epoch': 1} {'type': 'loss', 'content': 0.13784989714622498, 'timestamp': '2025-09-10 02:40:51.578140', 'step': 5962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:51.631557', 'step': 5962, 'epoch': 1} {'type': 'loss', 'content': 0.10685476660728455, 'timestamp': '2025-09-10 02:40:51.633902', 'step': 5963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:51.688085', 'step': 5963, 'epoch': 1} {'type': 'loss', 'content': 0.08804524689912796, 'timestamp': '2025-09-10 02:40:51.694347', 'step': 5964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:51.747649', 'step': 5964, 'epoch': 1} {'type': 'loss', 'content': 0.1286383420228958, 'timestamp': '2025-09-10 02:40:51.750149', 'step': 5965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:51.804323', 'step': 5965, 'epoch': 1} {'type': 'loss', 'content': 0.13953275978565216, 'timestamp': '2025-09-10 02:40:51.806666', 'step': 5966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:51.860533', 'step': 5966, 'epoch': 1} {'type': 'loss', 'content': 0.1177014634013176, 'timestamp': '2025-09-10 02:40:51.862891', 'step': 5967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:51.919011', 'step': 5967, 'epoch': 1} {'type': 'loss', 'content': 0.16470162570476532, 'timestamp': '2025-09-10 02:40:51.925721', 'step': 5968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:51.981017', 'step': 5968, 'epoch': 1} {'type': 'loss', 'content': 0.10781785100698471, 'timestamp': '2025-09-10 02:40:51.983406', 'step': 5969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:52.037658', 'step': 5969, 'epoch': 1} {'type': 'loss', 'content': 0.17478032410144806, 'timestamp': '2025-09-10 02:40:52.039999', 'step': 5970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:52.093954', 'step': 5970, 'epoch': 1} {'type': 'loss', 'content': 0.12689462304115295, 'timestamp': '2025-09-10 02:40:52.096323', 'step': 5971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:52.152349', 'step': 5971, 'epoch': 1} {'type': 'loss', 'content': 0.14310769736766815, 'timestamp': '2025-09-10 02:40:52.158687', 'step': 5972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:52.211466', 'step': 5972, 'epoch': 1} {'type': 'loss', 'content': 0.15352275967597961, 'timestamp': '2025-09-10 02:40:52.214066', 'step': 5973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:52.268294', 'step': 5973, 'epoch': 1} {'type': 'loss', 'content': 0.13581185042858124, 'timestamp': '2025-09-10 02:40:52.270603', 'step': 5974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:52.325100', 'step': 5974, 'epoch': 1} {'type': 'loss', 'content': 0.2180991917848587, 'timestamp': '2025-09-10 02:40:52.327542', 'step': 5975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:52.381242', 'step': 5975, 'epoch': 1} {'type': 'loss', 'content': 0.1445082426071167, 'timestamp': '2025-09-10 02:40:52.387582', 'step': 5976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:52.441070', 'step': 5976, 'epoch': 1} {'type': 'loss', 'content': 0.14664040505886078, 'timestamp': '2025-09-10 02:40:52.443398', 'step': 5977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:52.497820', 'step': 5977, 'epoch': 1} {'type': 'loss', 'content': 0.12536196410655975, 'timestamp': '2025-09-10 02:40:52.500094', 'step': 5978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:40:52.554614', 'step': 5978, 'epoch': 1} {'type': 'loss', 'content': 0.24771733582019806, 'timestamp': '2025-09-10 02:40:52.556984', 'step': 5979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:52.611779', 'step': 5979, 'epoch': 1} {'type': 'loss', 'content': 0.15135538578033447, 'timestamp': '2025-09-10 02:40:52.618162', 'step': 5980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:52.671718', 'step': 5980, 'epoch': 1} {'type': 'loss', 'content': 0.1397600769996643, 'timestamp': '2025-09-10 02:40:52.674094', 'step': 5981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:52.728301', 'step': 5981, 'epoch': 1} {'type': 'loss', 'content': 0.17641715705394745, 'timestamp': '2025-09-10 02:40:52.730680', 'step': 5982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:52.785031', 'step': 5982, 'epoch': 1} {'type': 'loss', 'content': 0.1818213015794754, 'timestamp': '2025-09-10 02:40:52.787578', 'step': 5983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:52.841264', 'step': 5983, 'epoch': 1} {'type': 'loss', 'content': 0.2727077901363373, 'timestamp': '2025-09-10 02:40:52.847653', 'step': 5984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:52.901380', 'step': 5984, 'epoch': 1} {'type': 'loss', 'content': 0.15403197705745697, 'timestamp': '2025-09-10 02:40:52.903880', 'step': 5985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:52.959441', 'step': 5985, 'epoch': 1} {'type': 'loss', 'content': 0.09932535141706467, 'timestamp': '2025-09-10 02:40:52.961950', 'step': 5986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:53.015846', 'step': 5986, 'epoch': 1} {'type': 'loss', 'content': 0.1301058828830719, 'timestamp': '2025-09-10 02:40:53.018480', 'step': 5987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:53.073978', 'step': 5987, 'epoch': 1} {'type': 'loss', 'content': 0.14847798645496368, 'timestamp': '2025-09-10 02:40:53.081409', 'step': 5988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:53.134246', 'step': 5988, 'epoch': 1} {'type': 'loss', 'content': 0.11549454927444458, 'timestamp': '2025-09-10 02:40:53.136704', 'step': 5989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:53.191459', 'step': 5989, 'epoch': 1} {'type': 'loss', 'content': 0.2499672770500183, 'timestamp': '2025-09-10 02:40:53.193887', 'step': 5990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:53.248174', 'step': 5990, 'epoch': 1} {'type': 'loss', 'content': 0.11273940652608871, 'timestamp': '2025-09-10 02:40:53.250581', 'step': 5991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:53.304784', 'step': 5991, 'epoch': 1} {'type': 'loss', 'content': 0.09128550440073013, 'timestamp': '2025-09-10 02:40:53.311511', 'step': 5992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:53.367185', 'step': 5992, 'epoch': 1} {'type': 'loss', 'content': 0.23394881188869476, 'timestamp': '2025-09-10 02:40:53.369665', 'step': 5993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:53.425381', 'step': 5993, 'epoch': 1} {'type': 'loss', 'content': 0.17323948442935944, 'timestamp': '2025-09-10 02:40:53.427852', 'step': 5994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:53.483833', 'step': 5994, 'epoch': 1} {'type': 'loss', 'content': 0.25465163588523865, 'timestamp': '2025-09-10 02:40:53.486288', 'step': 5995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:53.541021', 'step': 5995, 'epoch': 1} {'type': 'loss', 'content': 0.1593906581401825, 'timestamp': '2025-09-10 02:40:53.547890', 'step': 5996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:53.603680', 'step': 5996, 'epoch': 1} {'type': 'loss', 'content': 0.16507935523986816, 'timestamp': '2025-09-10 02:40:53.606022', 'step': 5997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:53.660131', 'step': 5997, 'epoch': 1} {'type': 'loss', 'content': 0.15877258777618408, 'timestamp': '2025-09-10 02:40:53.662505', 'step': 5998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:53.716439', 'step': 5998, 'epoch': 1} {'type': 'loss', 'content': 0.20141446590423584, 'timestamp': '2025-09-10 02:40:53.720445', 'step': 5999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:53.774513', 'step': 5999, 'epoch': 1} {'type': 'loss', 'content': 0.20436592400074005, 'timestamp': '2025-09-10 02:40:53.780688', 'step': 6000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-09-10 02:40:54.133911', 'step': 6000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:54.192380', 'step': 6000, 'epoch': 1} {'type': 'loss', 'content': 0.14951026439666748, 'timestamp': '2025-09-10 02:40:54.194692', 'step': 6001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:54.249323', 'step': 6001, 'epoch': 1} {'type': 'loss', 'content': 0.17136655747890472, 'timestamp': '2025-09-10 02:40:54.251586', 'step': 6002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:54.305017', 'step': 6002, 'epoch': 1} {'type': 'loss', 'content': 0.18096238374710083, 'timestamp': '2025-09-10 02:40:54.307439', 'step': 6003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:54.360500', 'step': 6003, 'epoch': 1} {'type': 'loss', 'content': 0.11983674019575119, 'timestamp': '2025-09-10 02:40:54.366752', 'step': 6004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:54.419692', 'step': 6004, 'epoch': 1} {'type': 'loss', 'content': 0.14398527145385742, 'timestamp': '2025-09-10 02:40:54.421833', 'step': 6005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:54.474771', 'step': 6005, 'epoch': 1} {'type': 'loss', 'content': 0.09887625277042389, 'timestamp': '2025-09-10 02:40:54.477721', 'step': 6006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:54.532048', 'step': 6006, 'epoch': 1} {'type': 'loss', 'content': 0.20745724439620972, 'timestamp': '2025-09-10 02:40:54.534375', 'step': 6007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:54.590852', 'step': 6007, 'epoch': 1} {'type': 'loss', 'content': 0.08912021666765213, 'timestamp': '2025-09-10 02:40:54.596971', 'step': 6008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:54.649850', 'step': 6008, 'epoch': 1} {'type': 'loss', 'content': 0.12736546993255615, 'timestamp': '2025-09-10 02:40:54.652166', 'step': 6009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:54.707710', 'step': 6009, 'epoch': 1} {'type': 'loss', 'content': 0.14381514489650726, 'timestamp': '2025-09-10 02:40:54.710211', 'step': 6010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:54.765911', 'step': 6010, 'epoch': 1} {'type': 'loss', 'content': 0.19857843220233917, 'timestamp': '2025-09-10 02:40:54.768008', 'step': 6011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:54.822315', 'step': 6011, 'epoch': 1} {'type': 'loss', 'content': 0.14704978466033936, 'timestamp': '2025-09-10 02:40:54.828224', 'step': 6012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:54.880700', 'step': 6012, 'epoch': 1} {'type': 'loss', 'content': 0.19802649319171906, 'timestamp': '2025-09-10 02:40:54.883059', 'step': 6013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:54.936208', 'step': 6013, 'epoch': 1} {'type': 'loss', 'content': 0.19894984364509583, 'timestamp': '2025-09-10 02:40:54.938377', 'step': 6014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:54.992194', 'step': 6014, 'epoch': 1} {'type': 'loss', 'content': 0.1201908215880394, 'timestamp': '2025-09-10 02:40:54.994704', 'step': 6015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:55.048989', 'step': 6015, 'epoch': 1} {'type': 'loss', 'content': 0.1758309304714203, 'timestamp': '2025-09-10 02:40:55.055520', 'step': 6016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:55.108675', 'step': 6016, 'epoch': 1} {'type': 'loss', 'content': 0.16218528151512146, 'timestamp': '2025-09-10 02:40:55.110901', 'step': 6017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:55.164182', 'step': 6017, 'epoch': 1} {'type': 'loss', 'content': 0.10573847591876984, 'timestamp': '2025-09-10 02:40:55.166352', 'step': 6018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:55.219586', 'step': 6018, 'epoch': 1} {'type': 'loss', 'content': 0.1104041188955307, 'timestamp': '2025-09-10 02:40:55.222053', 'step': 6019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:55.276651', 'step': 6019, 'epoch': 1} {'type': 'loss', 'content': 0.22362157702445984, 'timestamp': '2025-09-10 02:40:55.283320', 'step': 6020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:55.338365', 'step': 6020, 'epoch': 1} {'type': 'loss', 'content': 0.18546275794506073, 'timestamp': '2025-09-10 02:40:55.340896', 'step': 6021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:55.395464', 'step': 6021, 'epoch': 1} {'type': 'loss', 'content': 0.11071349680423737, 'timestamp': '2025-09-10 02:40:55.397638', 'step': 6022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:55.452846', 'step': 6022, 'epoch': 1} {'type': 'loss', 'content': 0.11543197929859161, 'timestamp': '2025-09-10 02:40:55.455185', 'step': 6023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:55.509395', 'step': 6023, 'epoch': 1} {'type': 'loss', 'content': 0.16205671429634094, 'timestamp': '2025-09-10 02:40:55.515520', 'step': 6024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:55.568884', 'step': 6024, 'epoch': 1} {'type': 'loss', 'content': 0.13786287605762482, 'timestamp': '2025-09-10 02:40:55.571005', 'step': 6025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:55.623581', 'step': 6025, 'epoch': 1} {'type': 'loss', 'content': 0.14714036881923676, 'timestamp': '2025-09-10 02:40:55.625662', 'step': 6026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:55.679135', 'step': 6026, 'epoch': 1} {'type': 'loss', 'content': 0.10470384359359741, 'timestamp': '2025-09-10 02:40:55.681301', 'step': 6027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:55.733858', 'step': 6027, 'epoch': 1} {'type': 'loss', 'content': 0.15745247900485992, 'timestamp': '2025-09-10 02:40:55.740047', 'step': 6028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:55.797057', 'step': 6028, 'epoch': 1} {'type': 'loss', 'content': 0.17488935589790344, 'timestamp': '2025-09-10 02:40:55.799524', 'step': 6029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:55.853240', 'step': 6029, 'epoch': 1} {'type': 'loss', 'content': 0.13011835515499115, 'timestamp': '2025-09-10 02:40:55.855734', 'step': 6030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:55.911572', 'step': 6030, 'epoch': 1} {'type': 'loss', 'content': 0.07231823354959488, 'timestamp': '2025-09-10 02:40:55.914021', 'step': 6031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:55.967527', 'step': 6031, 'epoch': 1} {'type': 'loss', 'content': 0.15965989232063293, 'timestamp': '2025-09-10 02:40:55.973780', 'step': 6032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.026618', 'step': 6032, 'epoch': 1} {'type': 'loss', 'content': 0.18352192640304565, 'timestamp': '2025-09-10 02:40:56.028979', 'step': 6033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.082844', 'step': 6033, 'epoch': 1} {'type': 'loss', 'content': 0.17855355143547058, 'timestamp': '2025-09-10 02:40:56.085189', 'step': 6034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:56.138758', 'step': 6034, 'epoch': 1} {'type': 'loss', 'content': 0.21091070771217346, 'timestamp': '2025-09-10 02:40:56.141189', 'step': 6035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.195882', 'step': 6035, 'epoch': 1} {'type': 'loss', 'content': 0.23853528499603271, 'timestamp': '2025-09-10 02:40:56.202112', 'step': 6036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:56.255637', 'step': 6036, 'epoch': 1} {'type': 'loss', 'content': 0.1836438626050949, 'timestamp': '2025-09-10 02:40:56.258070', 'step': 6037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:56.313200', 'step': 6037, 'epoch': 1} {'type': 'loss', 'content': 0.14535954594612122, 'timestamp': '2025-09-10 02:40:56.315581', 'step': 6038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.370001', 'step': 6038, 'epoch': 1} {'type': 'loss', 'content': 0.18408095836639404, 'timestamp': '2025-09-10 02:40:56.372417', 'step': 6039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.426244', 'step': 6039, 'epoch': 1} {'type': 'loss', 'content': 0.10533750057220459, 'timestamp': '2025-09-10 02:40:56.432633', 'step': 6040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:56.486158', 'step': 6040, 'epoch': 1} {'type': 'loss', 'content': 0.1648288071155548, 'timestamp': '2025-09-10 02:40:56.488528', 'step': 6041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.542198', 'step': 6041, 'epoch': 1} {'type': 'loss', 'content': 0.1508561223745346, 'timestamp': '2025-09-10 02:40:56.544730', 'step': 6042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:56.598485', 'step': 6042, 'epoch': 1} {'type': 'loss', 'content': 0.2018168568611145, 'timestamp': '2025-09-10 02:40:56.600850', 'step': 6043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.655619', 'step': 6043, 'epoch': 1} {'type': 'loss', 'content': 0.12784349918365479, 'timestamp': '2025-09-10 02:40:56.661492', 'step': 6044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:56.714509', 'step': 6044, 'epoch': 1} {'type': 'loss', 'content': 0.12201295047998428, 'timestamp': '2025-09-10 02:40:56.716603', 'step': 6045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:56.770226', 'step': 6045, 'epoch': 1} {'type': 'loss', 'content': 0.15741868317127228, 'timestamp': '2025-09-10 02:40:56.772434', 'step': 6046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.825783', 'step': 6046, 'epoch': 1} {'type': 'loss', 'content': 0.11258328706026077, 'timestamp': '2025-09-10 02:40:56.828185', 'step': 6047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.882932', 'step': 6047, 'epoch': 1} {'type': 'loss', 'content': 0.16758336126804352, 'timestamp': '2025-09-10 02:40:56.889627', 'step': 6048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:56.944055', 'step': 6048, 'epoch': 1} {'type': 'loss', 'content': 0.09041405469179153, 'timestamp': '2025-09-10 02:40:56.946497', 'step': 6049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:57.001516', 'step': 6049, 'epoch': 1} {'type': 'loss', 'content': 0.16738921403884888, 'timestamp': '2025-09-10 02:40:57.003981', 'step': 6050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:57.060329', 'step': 6050, 'epoch': 1} {'type': 'loss', 'content': 0.27423736453056335, 'timestamp': '2025-09-10 02:40:57.062640', 'step': 6051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:57.118000', 'step': 6051, 'epoch': 1} {'type': 'loss', 'content': 0.13015636801719666, 'timestamp': '2025-09-10 02:40:57.124455', 'step': 6052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:57.178625', 'step': 6052, 'epoch': 1} {'type': 'loss', 'content': 0.08577360957860947, 'timestamp': '2025-09-10 02:40:57.181074', 'step': 6053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:57.235619', 'step': 6053, 'epoch': 1} {'type': 'loss', 'content': 0.1090506985783577, 'timestamp': '2025-09-10 02:40:57.237962', 'step': 6054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:57.295238', 'step': 6054, 'epoch': 1} {'type': 'loss', 'content': 0.15526209771633148, 'timestamp': '2025-09-10 02:40:57.297710', 'step': 6055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:57.352773', 'step': 6055, 'epoch': 1} {'type': 'loss', 'content': 0.18891319632530212, 'timestamp': '2025-09-10 02:40:57.359305', 'step': 6056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:40:57.413782', 'step': 6056, 'epoch': 1} {'type': 'loss', 'content': 0.10805452615022659, 'timestamp': '2025-09-10 02:40:57.416253', 'step': 6057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:40:57.471457', 'step': 6057, 'epoch': 1} {'type': 'loss', 'content': 0.1604219377040863, 'timestamp': '2025-09-10 02:40:57.474174', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:41:10.284915', 'step': 6058, 'epoch': 1} {'type': 'pplx', 'content': 13656.505950026321, 'timestamp': '2025-09-10 02:41:10.288388', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:10.343047', 'step': 6058, 'epoch': 1} {'type': 'loss', 'content': 0.30407997965812683, 'timestamp': '2025-09-10 02:41:10.346178', 'step': 6059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:10.403625', 'step': 6059, 'epoch': 1} {'type': 'loss', 'content': 0.18266461789608002, 'timestamp': '2025-09-10 02:41:10.410403', 'step': 6060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:10.463658', 'step': 6060, 'epoch': 1} {'type': 'loss', 'content': 0.18269282579421997, 'timestamp': '2025-09-10 02:41:10.466909', 'step': 6061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:10.523807', 'step': 6061, 'epoch': 1} {'type': 'loss', 'content': 0.19090186059474945, 'timestamp': '2025-09-10 02:41:10.525749', 'step': 6062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:10.580132', 'step': 6062, 'epoch': 1} {'type': 'loss', 'content': 0.07422363013029099, 'timestamp': '2025-09-10 02:41:10.582283', 'step': 6063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:10.645901', 'step': 6063, 'epoch': 1} {'type': 'loss', 'content': 0.09134560078382492, 'timestamp': '2025-09-10 02:41:10.651899', 'step': 6064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:10.705306', 'step': 6064, 'epoch': 1} {'type': 'loss', 'content': 0.23420386016368866, 'timestamp': '2025-09-10 02:41:10.707883', 'step': 6065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:10.762021', 'step': 6065, 'epoch': 1} {'type': 'loss', 'content': 0.09083377569913864, 'timestamp': '2025-09-10 02:41:10.764584', 'step': 6066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:10.820441', 'step': 6066, 'epoch': 1} {'type': 'loss', 'content': 0.18062198162078857, 'timestamp': '2025-09-10 02:41:10.822548', 'step': 6067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:10.881980', 'step': 6067, 'epoch': 1} {'type': 'loss', 'content': 0.11701586842536926, 'timestamp': '2025-09-10 02:41:10.888309', 'step': 6068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:10.946365', 'step': 6068, 'epoch': 1} {'type': 'loss', 'content': 0.17768634855747223, 'timestamp': '2025-09-10 02:41:10.948568', 'step': 6069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:11.006086', 'step': 6069, 'epoch': 1} {'type': 'loss', 'content': 0.12612135708332062, 'timestamp': '2025-09-10 02:41:11.008395', 'step': 6070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:11.061950', 'step': 6070, 'epoch': 1} {'type': 'loss', 'content': 0.21815332770347595, 'timestamp': '2025-09-10 02:41:11.064027', 'step': 6071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:11.117998', 'step': 6071, 'epoch': 1} {'type': 'loss', 'content': 0.137821763753891, 'timestamp': '2025-09-10 02:41:11.123882', 'step': 6072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:11.177318', 'step': 6072, 'epoch': 1} {'type': 'loss', 'content': 0.1010209396481514, 'timestamp': '2025-09-10 02:41:11.179632', 'step': 6073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:11.233361', 'step': 6073, 'epoch': 1} {'type': 'loss', 'content': 0.15453167259693146, 'timestamp': '2025-09-10 02:41:11.235705', 'step': 6074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:11.290607', 'step': 6074, 'epoch': 1} {'type': 'loss', 'content': 0.15095925331115723, 'timestamp': '2025-09-10 02:41:11.292940', 'step': 6075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:11.347492', 'step': 6075, 'epoch': 1} {'type': 'loss', 'content': 0.1921255886554718, 'timestamp': '2025-09-10 02:41:11.353835', 'step': 6076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:11.408633', 'step': 6076, 'epoch': 1} {'type': 'loss', 'content': 0.09999194741249084, 'timestamp': '2025-09-10 02:41:11.410971', 'step': 6077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:11.464201', 'step': 6077, 'epoch': 1} {'type': 'loss', 'content': 0.09719472378492355, 'timestamp': '2025-09-10 02:41:11.466515', 'step': 6078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:11.520474', 'step': 6078, 'epoch': 1} {'type': 'loss', 'content': 0.14621329307556152, 'timestamp': '2025-09-10 02:41:11.522728', 'step': 6079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:11.577025', 'step': 6079, 'epoch': 1} {'type': 'loss', 'content': 0.19031767547130585, 'timestamp': '2025-09-10 02:41:11.583264', 'step': 6080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:11.637113', 'step': 6080, 'epoch': 1} {'type': 'loss', 'content': 0.21656787395477295, 'timestamp': '2025-09-10 02:41:11.639311', 'step': 6081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:11.693714', 'step': 6081, 'epoch': 1} {'type': 'loss', 'content': 0.09080199897289276, 'timestamp': '2025-09-10 02:41:11.696011', 'step': 6082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:11.750521', 'step': 6082, 'epoch': 1} {'type': 'loss', 'content': 0.1430732011795044, 'timestamp': '2025-09-10 02:41:11.753048', 'step': 6083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:11.807839', 'step': 6083, 'epoch': 1} {'type': 'loss', 'content': 0.12313655018806458, 'timestamp': '2025-09-10 02:41:11.814305', 'step': 6084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:11.868511', 'step': 6084, 'epoch': 1} {'type': 'loss', 'content': 0.20692797005176544, 'timestamp': '2025-09-10 02:41:11.870921', 'step': 6085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:11.926560', 'step': 6085, 'epoch': 1} {'type': 'loss', 'content': 0.2000155746936798, 'timestamp': '2025-09-10 02:41:11.928877', 'step': 6086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:11.982923', 'step': 6086, 'epoch': 1} {'type': 'loss', 'content': 0.34786203503608704, 'timestamp': '2025-09-10 02:41:11.985295', 'step': 6087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:12.039701', 'step': 6087, 'epoch': 1} {'type': 'loss', 'content': 0.20751401782035828, 'timestamp': '2025-09-10 02:41:12.045934', 'step': 6088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:12.099395', 'step': 6088, 'epoch': 1} {'type': 'loss', 'content': 0.09479156881570816, 'timestamp': '2025-09-10 02:41:12.101507', 'step': 6089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:12.155171', 'step': 6089, 'epoch': 1} {'type': 'loss', 'content': 0.20322605967521667, 'timestamp': '2025-09-10 02:41:12.157709', 'step': 6090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:12.215580', 'step': 6090, 'epoch': 1} {'type': 'loss', 'content': 0.1442634016275406, 'timestamp': '2025-09-10 02:41:12.217577', 'step': 6091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:12.271661', 'step': 6091, 'epoch': 1} {'type': 'loss', 'content': 0.21239854395389557, 'timestamp': '2025-09-10 02:41:12.278139', 'step': 6092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:12.332112', 'step': 6092, 'epoch': 1} {'type': 'loss', 'content': 0.13407719135284424, 'timestamp': '2025-09-10 02:41:12.334411', 'step': 6093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:12.389699', 'step': 6093, 'epoch': 1} {'type': 'loss', 'content': 0.1069011390209198, 'timestamp': '2025-09-10 02:41:12.391980', 'step': 6094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:12.446579', 'step': 6094, 'epoch': 1} {'type': 'loss', 'content': 0.2586403787136078, 'timestamp': '2025-09-10 02:41:12.449097', 'step': 6095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:12.503068', 'step': 6095, 'epoch': 1} {'type': 'loss', 'content': 0.23440097272396088, 'timestamp': '2025-09-10 02:41:12.509797', 'step': 6096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:12.564017', 'step': 6096, 'epoch': 1} {'type': 'loss', 'content': 0.12800690531730652, 'timestamp': '2025-09-10 02:41:12.566700', 'step': 6097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:12.621936', 'step': 6097, 'epoch': 1} {'type': 'loss', 'content': 0.11217541247606277, 'timestamp': '2025-09-10 02:41:12.624429', 'step': 6098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:12.678631', 'step': 6098, 'epoch': 1} {'type': 'loss', 'content': 0.1449999362230301, 'timestamp': '2025-09-10 02:41:12.680982', 'step': 6099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:12.735704', 'step': 6099, 'epoch': 1} {'type': 'loss', 'content': 0.10659055411815643, 'timestamp': '2025-09-10 02:41:12.741871', 'step': 6100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:12.795869', 'step': 6100, 'epoch': 1} {'type': 'loss', 'content': 0.13149535655975342, 'timestamp': '2025-09-10 02:41:12.798285', 'step': 6101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:12.852426', 'step': 6101, 'epoch': 1} {'type': 'loss', 'content': 0.13859376311302185, 'timestamp': '2025-09-10 02:41:12.854804', 'step': 6102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:12.909774', 'step': 6102, 'epoch': 1} {'type': 'loss', 'content': 0.13909882307052612, 'timestamp': '2025-09-10 02:41:12.912200', 'step': 6103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:12.965913', 'step': 6103, 'epoch': 1} {'type': 'loss', 'content': 0.1725502610206604, 'timestamp': '2025-09-10 02:41:12.972289', 'step': 6104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:13.025723', 'step': 6104, 'epoch': 1} {'type': 'loss', 'content': 0.15937285125255585, 'timestamp': '2025-09-10 02:41:13.028169', 'step': 6105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:13.082263', 'step': 6105, 'epoch': 1} {'type': 'loss', 'content': 0.13769614696502686, 'timestamp': '2025-09-10 02:41:13.084444', 'step': 6106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:13.138413', 'step': 6106, 'epoch': 1} {'type': 'loss', 'content': 0.20736002922058105, 'timestamp': '2025-09-10 02:41:13.140740', 'step': 6107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:13.195207', 'step': 6107, 'epoch': 1} {'type': 'loss', 'content': 0.20044024288654327, 'timestamp': '2025-09-10 02:41:13.201423', 'step': 6108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:13.254831', 'step': 6108, 'epoch': 1} {'type': 'loss', 'content': 0.17862294614315033, 'timestamp': '2025-09-10 02:41:13.257061', 'step': 6109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:13.310625', 'step': 6109, 'epoch': 1} {'type': 'loss', 'content': 0.13270972669124603, 'timestamp': '2025-09-10 02:41:13.312876', 'step': 6110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:13.366754', 'step': 6110, 'epoch': 1} {'type': 'loss', 'content': 0.1631440818309784, 'timestamp': '2025-09-10 02:41:13.369049', 'step': 6111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:13.424010', 'step': 6111, 'epoch': 1} {'type': 'loss', 'content': 0.16277211904525757, 'timestamp': '2025-09-10 02:41:13.429959', 'step': 6112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:13.484078', 'step': 6112, 'epoch': 1} {'type': 'loss', 'content': 0.14126412570476532, 'timestamp': '2025-09-10 02:41:13.486310', 'step': 6113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:13.543819', 'step': 6113, 'epoch': 1} {'type': 'loss', 'content': 0.13090692460536957, 'timestamp': '2025-09-10 02:41:13.546080', 'step': 6114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:13.602489', 'step': 6114, 'epoch': 1} {'type': 'loss', 'content': 0.11686789244413376, 'timestamp': '2025-09-10 02:41:13.604658', 'step': 6115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:13.658223', 'step': 6115, 'epoch': 1} {'type': 'loss', 'content': 0.19236540794372559, 'timestamp': '2025-09-10 02:41:13.664296', 'step': 6116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:13.717849', 'step': 6116, 'epoch': 1} {'type': 'loss', 'content': 0.1868070662021637, 'timestamp': '2025-09-10 02:41:13.719971', 'step': 6117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:13.773654', 'step': 6117, 'epoch': 1} {'type': 'loss', 'content': 0.12515558302402496, 'timestamp': '2025-09-10 02:41:13.775774', 'step': 6118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:13.830649', 'step': 6118, 'epoch': 1} {'type': 'loss', 'content': 0.158370241522789, 'timestamp': '2025-09-10 02:41:13.832867', 'step': 6119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:13.887695', 'step': 6119, 'epoch': 1} {'type': 'loss', 'content': 0.1934656947851181, 'timestamp': '2025-09-10 02:41:13.893882', 'step': 6120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:13.947308', 'step': 6120, 'epoch': 1} {'type': 'loss', 'content': 0.10778425633907318, 'timestamp': '2025-09-10 02:41:13.949521', 'step': 6121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:14.005016', 'step': 6121, 'epoch': 1} {'type': 'loss', 'content': 0.193302720785141, 'timestamp': '2025-09-10 02:41:14.007195', 'step': 6122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:14.061239', 'step': 6122, 'epoch': 1} {'type': 'loss', 'content': 0.20475724339485168, 'timestamp': '2025-09-10 02:41:14.063458', 'step': 6123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:14.117536', 'step': 6123, 'epoch': 1} {'type': 'loss', 'content': 0.1074756607413292, 'timestamp': '2025-09-10 02:41:14.123436', 'step': 6124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:14.177094', 'step': 6124, 'epoch': 1} {'type': 'loss', 'content': 0.276505708694458, 'timestamp': '2025-09-10 02:41:14.179472', 'step': 6125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:14.233798', 'step': 6125, 'epoch': 1} {'type': 'loss', 'content': 0.19528092443943024, 'timestamp': '2025-09-10 02:41:14.236069', 'step': 6126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:14.290810', 'step': 6126, 'epoch': 1} {'type': 'loss', 'content': 0.09000661224126816, 'timestamp': '2025-09-10 02:41:14.292986', 'step': 6127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:14.348403', 'step': 6127, 'epoch': 1} {'type': 'loss', 'content': 0.1893371343612671, 'timestamp': '2025-09-10 02:41:14.354461', 'step': 6128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:14.408052', 'step': 6128, 'epoch': 1} {'type': 'loss', 'content': 0.12901577353477478, 'timestamp': '2025-09-10 02:41:14.410186', 'step': 6129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:14.464457', 'step': 6129, 'epoch': 1} {'type': 'loss', 'content': 0.16589216887950897, 'timestamp': '2025-09-10 02:41:14.466790', 'step': 6130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:14.521643', 'step': 6130, 'epoch': 1} {'type': 'loss', 'content': 0.10413143038749695, 'timestamp': '2025-09-10 02:41:14.523894', 'step': 6131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:14.577828', 'step': 6131, 'epoch': 1} {'type': 'loss', 'content': 0.3044709265232086, 'timestamp': '2025-09-10 02:41:14.584058', 'step': 6132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:14.637618', 'step': 6132, 'epoch': 1} {'type': 'loss', 'content': 0.15560923516750336, 'timestamp': '2025-09-10 02:41:14.642284', 'step': 6133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:14.695769', 'step': 6133, 'epoch': 1} {'type': 'loss', 'content': 0.11422248184680939, 'timestamp': '2025-09-10 02:41:14.697890', 'step': 6134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:14.753617', 'step': 6134, 'epoch': 1} {'type': 'loss', 'content': 0.185357928276062, 'timestamp': '2025-09-10 02:41:14.755760', 'step': 6135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:14.809411', 'step': 6135, 'epoch': 1} {'type': 'loss', 'content': 0.12919476628303528, 'timestamp': '2025-09-10 02:41:14.815565', 'step': 6136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:14.868862', 'step': 6136, 'epoch': 1} {'type': 'loss', 'content': 0.15719494223594666, 'timestamp': '2025-09-10 02:41:14.871025', 'step': 6137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:14.925485', 'step': 6137, 'epoch': 1} {'type': 'loss', 'content': 0.23925283551216125, 'timestamp': '2025-09-10 02:41:14.927478', 'step': 6138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:14.982017', 'step': 6138, 'epoch': 1} {'type': 'loss', 'content': 0.22955092787742615, 'timestamp': '2025-09-10 02:41:14.984224', 'step': 6139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:15.039221', 'step': 6139, 'epoch': 1} {'type': 'loss', 'content': 0.13330909609794617, 'timestamp': '2025-09-10 02:41:15.045877', 'step': 6140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:15.100660', 'step': 6140, 'epoch': 1} {'type': 'loss', 'content': 0.1415501832962036, 'timestamp': '2025-09-10 02:41:15.102857', 'step': 6141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:15.158879', 'step': 6141, 'epoch': 1} {'type': 'loss', 'content': 0.25637757778167725, 'timestamp': '2025-09-10 02:41:15.161016', 'step': 6142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:15.215474', 'step': 6142, 'epoch': 1} {'type': 'loss', 'content': 0.20454376935958862, 'timestamp': '2025-09-10 02:41:15.217574', 'step': 6143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:15.271755', 'step': 6143, 'epoch': 1} {'type': 'loss', 'content': 0.10932651162147522, 'timestamp': '2025-09-10 02:41:15.277952', 'step': 6144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:15.331903', 'step': 6144, 'epoch': 1} {'type': 'loss', 'content': 0.213389053940773, 'timestamp': '2025-09-10 02:41:15.334097', 'step': 6145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:15.389517', 'step': 6145, 'epoch': 1} {'type': 'loss', 'content': 0.16038905084133148, 'timestamp': '2025-09-10 02:41:15.391477', 'step': 6146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:15.446087', 'step': 6146, 'epoch': 1} {'type': 'loss', 'content': 0.18999819457530975, 'timestamp': '2025-09-10 02:41:15.448109', 'step': 6147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:15.502459', 'step': 6147, 'epoch': 1} {'type': 'loss', 'content': 0.16095812618732452, 'timestamp': '2025-09-10 02:41:15.508492', 'step': 6148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:15.563906', 'step': 6148, 'epoch': 1} {'type': 'loss', 'content': 0.14140412211418152, 'timestamp': '2025-09-10 02:41:15.566116', 'step': 6149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:15.621433', 'step': 6149, 'epoch': 1} {'type': 'loss', 'content': 0.17520540952682495, 'timestamp': '2025-09-10 02:41:15.623374', 'step': 6150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:15.678255', 'step': 6150, 'epoch': 1} {'type': 'loss', 'content': 0.19426380097866058, 'timestamp': '2025-09-10 02:41:15.680214', 'step': 6151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:15.734507', 'step': 6151, 'epoch': 1} {'type': 'loss', 'content': 0.17888441681861877, 'timestamp': '2025-09-10 02:41:15.740699', 'step': 6152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:15.795605', 'step': 6152, 'epoch': 1} {'type': 'loss', 'content': 0.254741907119751, 'timestamp': '2025-09-10 02:41:15.797628', 'step': 6153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:15.852116', 'step': 6153, 'epoch': 1} {'type': 'loss', 'content': 0.11356636136770248, 'timestamp': '2025-09-10 02:41:15.854324', 'step': 6154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:15.910266', 'step': 6154, 'epoch': 1} {'type': 'loss', 'content': 0.18959200382232666, 'timestamp': '2025-09-10 02:41:15.912458', 'step': 6155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:15.971566', 'step': 6155, 'epoch': 1} {'type': 'loss', 'content': 0.19345536828041077, 'timestamp': '2025-09-10 02:41:15.978434', 'step': 6156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:16.035307', 'step': 6156, 'epoch': 1} {'type': 'loss', 'content': 0.16089576482772827, 'timestamp': '2025-09-10 02:41:16.037333', 'step': 6157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:16.095299', 'step': 6157, 'epoch': 1} {'type': 'loss', 'content': 0.31802716851234436, 'timestamp': '2025-09-10 02:41:16.097467', 'step': 6158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:16.154212', 'step': 6158, 'epoch': 1} {'type': 'loss', 'content': 0.1772952526807785, 'timestamp': '2025-09-10 02:41:16.156485', 'step': 6159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:16.212924', 'step': 6159, 'epoch': 1} {'type': 'loss', 'content': 0.11844094842672348, 'timestamp': '2025-09-10 02:41:16.219420', 'step': 6160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:16.276507', 'step': 6160, 'epoch': 1} {'type': 'loss', 'content': 0.11053167283535004, 'timestamp': '2025-09-10 02:41:16.278684', 'step': 6161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:16.333655', 'step': 6161, 'epoch': 1} {'type': 'loss', 'content': 0.16248837113380432, 'timestamp': '2025-09-10 02:41:16.335747', 'step': 6162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:16.390915', 'step': 6162, 'epoch': 1} {'type': 'loss', 'content': 0.11077012866735458, 'timestamp': '2025-09-10 02:41:16.392837', 'step': 6163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:16.447864', 'step': 6163, 'epoch': 1} {'type': 'loss', 'content': 0.15450534224510193, 'timestamp': '2025-09-10 02:41:16.453980', 'step': 6164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:16.508104', 'step': 6164, 'epoch': 1} {'type': 'loss', 'content': 0.1811540573835373, 'timestamp': '2025-09-10 02:41:16.510004', 'step': 6165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:16.564116', 'step': 6165, 'epoch': 1} {'type': 'loss', 'content': 0.09598854929208755, 'timestamp': '2025-09-10 02:41:16.566063', 'step': 6166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:16.621175', 'step': 6166, 'epoch': 1} {'type': 'loss', 'content': 0.1761641651391983, 'timestamp': '2025-09-10 02:41:16.623507', 'step': 6167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:16.679164', 'step': 6167, 'epoch': 1} {'type': 'loss', 'content': 0.14091837406158447, 'timestamp': '2025-09-10 02:41:16.685602', 'step': 6168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:16.740762', 'step': 6168, 'epoch': 1} {'type': 'loss', 'content': 0.18950927257537842, 'timestamp': '2025-09-10 02:41:16.742853', 'step': 6169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:16.797935', 'step': 6169, 'epoch': 1} {'type': 'loss', 'content': 0.12412603944540024, 'timestamp': '2025-09-10 02:41:16.800065', 'step': 6170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:16.854215', 'step': 6170, 'epoch': 1} {'type': 'loss', 'content': 0.17182449996471405, 'timestamp': '2025-09-10 02:41:16.856166', 'step': 6171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:16.910412', 'step': 6171, 'epoch': 1} {'type': 'loss', 'content': 0.13997098803520203, 'timestamp': '2025-09-10 02:41:16.916575', 'step': 6172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:16.972316', 'step': 6172, 'epoch': 1} {'type': 'loss', 'content': 0.1241823136806488, 'timestamp': '2025-09-10 02:41:16.974306', 'step': 6173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:17.028957', 'step': 6173, 'epoch': 1} {'type': 'loss', 'content': 0.22953644394874573, 'timestamp': '2025-09-10 02:41:17.030898', 'step': 6174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:17.086743', 'step': 6174, 'epoch': 1} {'type': 'loss', 'content': 0.15649782121181488, 'timestamp': '2025-09-10 02:41:17.088829', 'step': 6175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:17.144777', 'step': 6175, 'epoch': 1} {'type': 'loss', 'content': 0.16204102337360382, 'timestamp': '2025-09-10 02:41:17.155504', 'step': 6176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:17.213014', 'step': 6176, 'epoch': 1} {'type': 'loss', 'content': 0.1638210415840149, 'timestamp': '2025-09-10 02:41:17.215185', 'step': 6177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:17.271252', 'step': 6177, 'epoch': 1} {'type': 'loss', 'content': 0.23546499013900757, 'timestamp': '2025-09-10 02:41:17.273579', 'step': 6178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:17.328058', 'step': 6178, 'epoch': 1} {'type': 'loss', 'content': 0.1339033991098404, 'timestamp': '2025-09-10 02:41:17.330051', 'step': 6179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:17.387638', 'step': 6179, 'epoch': 1} {'type': 'loss', 'content': 0.2089640200138092, 'timestamp': '2025-09-10 02:41:17.393828', 'step': 6180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:17.447701', 'step': 6180, 'epoch': 1} {'type': 'loss', 'content': 0.1391569972038269, 'timestamp': '2025-09-10 02:41:17.449946', 'step': 6181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:17.504085', 'step': 6181, 'epoch': 1} {'type': 'loss', 'content': 0.13567332923412323, 'timestamp': '2025-09-10 02:41:17.506397', 'step': 6182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:17.561082', 'step': 6182, 'epoch': 1} {'type': 'loss', 'content': 0.16809700429439545, 'timestamp': '2025-09-10 02:41:17.563395', 'step': 6183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:17.618253', 'step': 6183, 'epoch': 1} {'type': 'loss', 'content': 0.1917947381734848, 'timestamp': '2025-09-10 02:41:17.625023', 'step': 6184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:17.679327', 'step': 6184, 'epoch': 1} {'type': 'loss', 'content': 0.1636856347322464, 'timestamp': '2025-09-10 02:41:17.682082', 'step': 6185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:17.737971', 'step': 6185, 'epoch': 1} {'type': 'loss', 'content': 0.13600040972232819, 'timestamp': '2025-09-10 02:41:17.740122', 'step': 6186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:17.797740', 'step': 6186, 'epoch': 1} {'type': 'loss', 'content': 0.23006704449653625, 'timestamp': '2025-09-10 02:41:17.799930', 'step': 6187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:17.854716', 'step': 6187, 'epoch': 1} {'type': 'loss', 'content': 0.1685345619916916, 'timestamp': '2025-09-10 02:41:17.860928', 'step': 6188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:17.914745', 'step': 6188, 'epoch': 1} {'type': 'loss', 'content': 0.08407182991504669, 'timestamp': '2025-09-10 02:41:17.916891', 'step': 6189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:17.972227', 'step': 6189, 'epoch': 1} {'type': 'loss', 'content': 0.19635790586471558, 'timestamp': '2025-09-10 02:41:17.974428', 'step': 6190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:18.029321', 'step': 6190, 'epoch': 1} {'type': 'loss', 'content': 0.10524354875087738, 'timestamp': '2025-09-10 02:41:18.031390', 'step': 6191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:18.085978', 'step': 6191, 'epoch': 1} {'type': 'loss', 'content': 0.24804233014583588, 'timestamp': '2025-09-10 02:41:18.093358', 'step': 6192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:18.148835', 'step': 6192, 'epoch': 1} {'type': 'loss', 'content': 0.1941811740398407, 'timestamp': '2025-09-10 02:41:18.153609', 'step': 6193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:18.216784', 'step': 6193, 'epoch': 1} {'type': 'loss', 'content': 0.20884919166564941, 'timestamp': '2025-09-10 02:41:18.219087', 'step': 6194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:18.276088', 'step': 6194, 'epoch': 1} {'type': 'loss', 'content': 0.17362181842327118, 'timestamp': '2025-09-10 02:41:18.278345', 'step': 6195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:18.333189', 'step': 6195, 'epoch': 1} {'type': 'loss', 'content': 0.09385282546281815, 'timestamp': '2025-09-10 02:41:18.339293', 'step': 6196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:18.393385', 'step': 6196, 'epoch': 1} {'type': 'loss', 'content': 0.17259559035301208, 'timestamp': '2025-09-10 02:41:18.395696', 'step': 6197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:18.449904', 'step': 6197, 'epoch': 1} {'type': 'loss', 'content': 0.09605906158685684, 'timestamp': '2025-09-10 02:41:18.452297', 'step': 6198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:18.510875', 'step': 6198, 'epoch': 1} {'type': 'loss', 'content': 0.12211992591619492, 'timestamp': '2025-09-10 02:41:18.513071', 'step': 6199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:18.567161', 'step': 6199, 'epoch': 1} {'type': 'loss', 'content': 0.12588387727737427, 'timestamp': '2025-09-10 02:41:18.573323', 'step': 6200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:18.626794', 'step': 6200, 'epoch': 1} {'type': 'loss', 'content': 0.23519770801067352, 'timestamp': '2025-09-10 02:41:18.628691', 'step': 6201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:18.682377', 'step': 6201, 'epoch': 1} {'type': 'loss', 'content': 0.12817375361919403, 'timestamp': '2025-09-10 02:41:18.684460', 'step': 6202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:18.739167', 'step': 6202, 'epoch': 1} {'type': 'loss', 'content': 0.16119885444641113, 'timestamp': '2025-09-10 02:41:18.741307', 'step': 6203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:18.795782', 'step': 6203, 'epoch': 1} {'type': 'loss', 'content': 0.16464583575725555, 'timestamp': '2025-09-10 02:41:18.801963', 'step': 6204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:18.856263', 'step': 6204, 'epoch': 1} {'type': 'loss', 'content': 0.13981060683727264, 'timestamp': '2025-09-10 02:41:18.858464', 'step': 6205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:18.912495', 'step': 6205, 'epoch': 1} {'type': 'loss', 'content': 0.2541671097278595, 'timestamp': '2025-09-10 02:41:18.914694', 'step': 6206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:18.969084', 'step': 6206, 'epoch': 1} {'type': 'loss', 'content': 0.12475263327360153, 'timestamp': '2025-09-10 02:41:18.971326', 'step': 6207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:19.024655', 'step': 6207, 'epoch': 1} {'type': 'loss', 'content': 0.14984573423862457, 'timestamp': '2025-09-10 02:41:19.030669', 'step': 6208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:19.083028', 'step': 6208, 'epoch': 1} {'type': 'loss', 'content': 0.2122572362422943, 'timestamp': '2025-09-10 02:41:19.085169', 'step': 6209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:19.139393', 'step': 6209, 'epoch': 1} {'type': 'loss', 'content': 0.18943151831626892, 'timestamp': '2025-09-10 02:41:19.141568', 'step': 6210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:19.197856', 'step': 6210, 'epoch': 1} {'type': 'loss', 'content': 0.19627094268798828, 'timestamp': '2025-09-10 02:41:19.200322', 'step': 6211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:19.254412', 'step': 6211, 'epoch': 1} {'type': 'loss', 'content': 0.1705159991979599, 'timestamp': '2025-09-10 02:41:19.260786', 'step': 6212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:19.315058', 'step': 6212, 'epoch': 1} {'type': 'loss', 'content': 0.17388682067394257, 'timestamp': '2025-09-10 02:41:19.317404', 'step': 6213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:19.371502', 'step': 6213, 'epoch': 1} {'type': 'loss', 'content': 0.13223236799240112, 'timestamp': '2025-09-10 02:41:19.373594', 'step': 6214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:19.428026', 'step': 6214, 'epoch': 1} {'type': 'loss', 'content': 0.08790584653615952, 'timestamp': '2025-09-10 02:41:19.430199', 'step': 6215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:19.483930', 'step': 6215, 'epoch': 1} {'type': 'loss', 'content': 0.1486441045999527, 'timestamp': '2025-09-10 02:41:19.490012', 'step': 6216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:19.543359', 'step': 6216, 'epoch': 1} {'type': 'loss', 'content': 0.16745515167713165, 'timestamp': '2025-09-10 02:41:19.545469', 'step': 6217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:19.598710', 'step': 6217, 'epoch': 1} {'type': 'loss', 'content': 0.2515582740306854, 'timestamp': '2025-09-10 02:41:19.600814', 'step': 6218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:19.654205', 'step': 6218, 'epoch': 1} {'type': 'loss', 'content': 0.3341398239135742, 'timestamp': '2025-09-10 02:41:19.656111', 'step': 6219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:19.709399', 'step': 6219, 'epoch': 1} {'type': 'loss', 'content': 0.11968595534563065, 'timestamp': '2025-09-10 02:41:19.715285', 'step': 6220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:19.769233', 'step': 6220, 'epoch': 1} {'type': 'loss', 'content': 0.13271717727184296, 'timestamp': '2025-09-10 02:41:19.771354', 'step': 6221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:19.824918', 'step': 6221, 'epoch': 1} {'type': 'loss', 'content': 0.17495861649513245, 'timestamp': '2025-09-10 02:41:19.827452', 'step': 6222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:19.881785', 'step': 6222, 'epoch': 1} {'type': 'loss', 'content': 0.09013094753026962, 'timestamp': '2025-09-10 02:41:19.883806', 'step': 6223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:19.937701', 'step': 6223, 'epoch': 1} {'type': 'loss', 'content': 0.1509096920490265, 'timestamp': '2025-09-10 02:41:19.945289', 'step': 6224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:19.999989', 'step': 6224, 'epoch': 1} {'type': 'loss', 'content': 0.11089054495096207, 'timestamp': '2025-09-10 02:41:20.002144', 'step': 6225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:20.055958', 'step': 6225, 'epoch': 1} {'type': 'loss', 'content': 0.14352397620677948, 'timestamp': '2025-09-10 02:41:20.058285', 'step': 6226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:20.112820', 'step': 6226, 'epoch': 1} {'type': 'loss', 'content': 0.21659010648727417, 'timestamp': '2025-09-10 02:41:20.114929', 'step': 6227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:20.169005', 'step': 6227, 'epoch': 1} {'type': 'loss', 'content': 0.27716490626335144, 'timestamp': '2025-09-10 02:41:20.175182', 'step': 6228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:20.228413', 'step': 6228, 'epoch': 1} {'type': 'loss', 'content': 0.13151295483112335, 'timestamp': '2025-09-10 02:41:20.230625', 'step': 6229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:20.284264', 'step': 6229, 'epoch': 1} {'type': 'loss', 'content': 0.14156168699264526, 'timestamp': '2025-09-10 02:41:20.286303', 'step': 6230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:20.340417', 'step': 6230, 'epoch': 1} {'type': 'loss', 'content': 0.1282883584499359, 'timestamp': '2025-09-10 02:41:20.342192', 'step': 6231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:20.395991', 'step': 6231, 'epoch': 1} {'type': 'loss', 'content': 0.2541455328464508, 'timestamp': '2025-09-10 02:41:20.401987', 'step': 6232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:20.456423', 'step': 6232, 'epoch': 1} {'type': 'loss', 'content': 0.11635828018188477, 'timestamp': '2025-09-10 02:41:20.458475', 'step': 6233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:20.511850', 'step': 6233, 'epoch': 1} {'type': 'loss', 'content': 0.1608622968196869, 'timestamp': '2025-09-10 02:41:20.513626', 'step': 6234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:20.567134', 'step': 6234, 'epoch': 1} {'type': 'loss', 'content': 0.17911207675933838, 'timestamp': '2025-09-10 02:41:20.568961', 'step': 6235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:20.622141', 'step': 6235, 'epoch': 1} {'type': 'loss', 'content': 0.14014932513237, 'timestamp': '2025-09-10 02:41:20.628103', 'step': 6236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:20.681343', 'step': 6236, 'epoch': 1} {'type': 'loss', 'content': 0.13850142061710358, 'timestamp': '2025-09-10 02:41:20.683591', 'step': 6237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:20.737905', 'step': 6237, 'epoch': 1} {'type': 'loss', 'content': 0.08864042162895203, 'timestamp': '2025-09-10 02:41:20.739944', 'step': 6238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:20.794963', 'step': 6238, 'epoch': 1} {'type': 'loss', 'content': 0.24132896959781647, 'timestamp': '2025-09-10 02:41:20.797114', 'step': 6239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:20.853665', 'step': 6239, 'epoch': 1} {'type': 'loss', 'content': 0.1846799999475479, 'timestamp': '2025-09-10 02:41:20.859896', 'step': 6240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:20.913958', 'step': 6240, 'epoch': 1} {'type': 'loss', 'content': 0.16222256422042847, 'timestamp': '2025-09-10 02:41:20.916243', 'step': 6241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:20.971020', 'step': 6241, 'epoch': 1} {'type': 'loss', 'content': 0.2420041412115097, 'timestamp': '2025-09-10 02:41:20.973470', 'step': 6242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:21.027675', 'step': 6242, 'epoch': 1} {'type': 'loss', 'content': 0.15049102902412415, 'timestamp': '2025-09-10 02:41:21.029951', 'step': 6243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:21.083852', 'step': 6243, 'epoch': 1} {'type': 'loss', 'content': 0.18415145576000214, 'timestamp': '2025-09-10 02:41:21.090084', 'step': 6244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:21.144711', 'step': 6244, 'epoch': 1} {'type': 'loss', 'content': 0.07605131715536118, 'timestamp': '2025-09-10 02:41:21.146790', 'step': 6245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:21.201935', 'step': 6245, 'epoch': 1} {'type': 'loss', 'content': 0.1305641531944275, 'timestamp': '2025-09-10 02:41:21.204108', 'step': 6246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:21.258958', 'step': 6246, 'epoch': 1} {'type': 'loss', 'content': 0.23448176681995392, 'timestamp': '2025-09-10 02:41:21.261263', 'step': 6247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:21.316290', 'step': 6247, 'epoch': 1} {'type': 'loss', 'content': 0.14476920664310455, 'timestamp': '2025-09-10 02:41:21.322572', 'step': 6248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:21.378143', 'step': 6248, 'epoch': 1} {'type': 'loss', 'content': 0.13943655788898468, 'timestamp': '2025-09-10 02:41:21.380443', 'step': 6249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:21.435541', 'step': 6249, 'epoch': 1} {'type': 'loss', 'content': 0.18583445250988007, 'timestamp': '2025-09-10 02:41:21.437512', 'step': 6250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:21.495931', 'step': 6250, 'epoch': 1} {'type': 'loss', 'content': 0.12784145772457123, 'timestamp': '2025-09-10 02:41:21.498157', 'step': 6251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:21.553083', 'step': 6251, 'epoch': 1} {'type': 'loss', 'content': 0.11545010656118393, 'timestamp': '2025-09-10 02:41:21.559531', 'step': 6252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:21.613754', 'step': 6252, 'epoch': 1} {'type': 'loss', 'content': 0.08774976432323456, 'timestamp': '2025-09-10 02:41:21.615918', 'step': 6253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:21.669648', 'step': 6253, 'epoch': 1} {'type': 'loss', 'content': 0.22292210161685944, 'timestamp': '2025-09-10 02:41:21.671978', 'step': 6254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:21.726366', 'step': 6254, 'epoch': 1} {'type': 'loss', 'content': 0.28570079803466797, 'timestamp': '2025-09-10 02:41:21.728851', 'step': 6255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:21.783546', 'step': 6255, 'epoch': 1} {'type': 'loss', 'content': 0.12504324316978455, 'timestamp': '2025-09-10 02:41:21.789890', 'step': 6256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:21.843669', 'step': 6256, 'epoch': 1} {'type': 'loss', 'content': 0.12892384827136993, 'timestamp': '2025-09-10 02:41:21.845941', 'step': 6257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:21.899795', 'step': 6257, 'epoch': 1} {'type': 'loss', 'content': 0.14748935401439667, 'timestamp': '2025-09-10 02:41:21.902016', 'step': 6258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:21.957513', 'step': 6258, 'epoch': 1} {'type': 'loss', 'content': 0.2012942135334015, 'timestamp': '2025-09-10 02:41:21.959636', 'step': 6259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:22.014798', 'step': 6259, 'epoch': 1} {'type': 'loss', 'content': 0.18608473241329193, 'timestamp': '2025-09-10 02:41:22.020984', 'step': 6260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:22.076233', 'step': 6260, 'epoch': 1} {'type': 'loss', 'content': 0.08982435613870621, 'timestamp': '2025-09-10 02:41:22.078439', 'step': 6261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:22.133726', 'step': 6261, 'epoch': 1} {'type': 'loss', 'content': 0.08235253393650055, 'timestamp': '2025-09-10 02:41:22.136015', 'step': 6262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:22.191184', 'step': 6262, 'epoch': 1} {'type': 'loss', 'content': 0.1953008472919464, 'timestamp': '2025-09-10 02:41:22.193483', 'step': 6263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:22.249431', 'step': 6263, 'epoch': 1} {'type': 'loss', 'content': 0.14185170829296112, 'timestamp': '2025-09-10 02:41:22.255558', 'step': 6264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:22.309031', 'step': 6264, 'epoch': 1} {'type': 'loss', 'content': 0.13907413184642792, 'timestamp': '2025-09-10 02:41:22.311196', 'step': 6265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:22.365591', 'step': 6265, 'epoch': 1} {'type': 'loss', 'content': 0.20382355153560638, 'timestamp': '2025-09-10 02:41:22.367759', 'step': 6266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:22.422563', 'step': 6266, 'epoch': 1} {'type': 'loss', 'content': 0.13570623099803925, 'timestamp': '2025-09-10 02:41:22.424798', 'step': 6267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:22.479820', 'step': 6267, 'epoch': 1} {'type': 'loss', 'content': 0.15558764338493347, 'timestamp': '2025-09-10 02:41:22.485923', 'step': 6268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:22.540181', 'step': 6268, 'epoch': 1} {'type': 'loss', 'content': 0.17906534671783447, 'timestamp': '2025-09-10 02:41:22.542320', 'step': 6269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:22.597101', 'step': 6269, 'epoch': 1} {'type': 'loss', 'content': 0.19063018262386322, 'timestamp': '2025-09-10 02:41:22.599501', 'step': 6270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:22.655292', 'step': 6270, 'epoch': 1} {'type': 'loss', 'content': 0.19225558638572693, 'timestamp': '2025-09-10 02:41:22.657431', 'step': 6271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:22.711779', 'step': 6271, 'epoch': 1} {'type': 'loss', 'content': 0.15034443140029907, 'timestamp': '2025-09-10 02:41:22.717900', 'step': 6272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:22.772072', 'step': 6272, 'epoch': 1} {'type': 'loss', 'content': 0.1327980011701584, 'timestamp': '2025-09-10 02:41:22.774303', 'step': 6273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:22.829415', 'step': 6273, 'epoch': 1} {'type': 'loss', 'content': 0.1733672022819519, 'timestamp': '2025-09-10 02:41:22.831945', 'step': 6274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:22.886439', 'step': 6274, 'epoch': 1} {'type': 'loss', 'content': 0.13241781294345856, 'timestamp': '2025-09-10 02:41:22.888652', 'step': 6275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:22.943988', 'step': 6275, 'epoch': 1} {'type': 'loss', 'content': 0.24228225648403168, 'timestamp': '2025-09-10 02:41:22.950048', 'step': 6276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:23.004504', 'step': 6276, 'epoch': 1} {'type': 'loss', 'content': 0.11951931565999985, 'timestamp': '2025-09-10 02:41:23.006491', 'step': 6277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:23.062080', 'step': 6277, 'epoch': 1} {'type': 'loss', 'content': 0.20846305787563324, 'timestamp': '2025-09-10 02:41:23.064195', 'step': 6278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:23.122281', 'step': 6278, 'epoch': 1} {'type': 'loss', 'content': 0.1840106099843979, 'timestamp': '2025-09-10 02:41:23.124385', 'step': 6279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:23.179929', 'step': 6279, 'epoch': 1} {'type': 'loss', 'content': 0.1306186318397522, 'timestamp': '2025-09-10 02:41:23.186198', 'step': 6280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:23.240118', 'step': 6280, 'epoch': 1} {'type': 'loss', 'content': 0.18780724704265594, 'timestamp': '2025-09-10 02:41:23.243735', 'step': 6281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:23.298564', 'step': 6281, 'epoch': 1} {'type': 'loss', 'content': 0.1751689314842224, 'timestamp': '2025-09-10 02:41:23.300698', 'step': 6282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:23.354442', 'step': 6282, 'epoch': 1} {'type': 'loss', 'content': 0.26846885681152344, 'timestamp': '2025-09-10 02:41:23.356347', 'step': 6283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:23.410580', 'step': 6283, 'epoch': 1} {'type': 'loss', 'content': 0.14015048742294312, 'timestamp': '2025-09-10 02:41:23.416763', 'step': 6284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:23.470613', 'step': 6284, 'epoch': 1} {'type': 'loss', 'content': 0.17960213124752045, 'timestamp': '2025-09-10 02:41:23.472871', 'step': 6285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:23.526314', 'step': 6285, 'epoch': 1} {'type': 'loss', 'content': 0.1742236316204071, 'timestamp': '2025-09-10 02:41:23.528427', 'step': 6286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:23.581809', 'step': 6286, 'epoch': 1} {'type': 'loss', 'content': 0.16656318306922913, 'timestamp': '2025-09-10 02:41:23.584044', 'step': 6287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:23.639427', 'step': 6287, 'epoch': 1} {'type': 'loss', 'content': 0.2581596374511719, 'timestamp': '2025-09-10 02:41:23.647002', 'step': 6288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:23.702275', 'step': 6288, 'epoch': 1} {'type': 'loss', 'content': 0.1866302490234375, 'timestamp': '2025-09-10 02:41:23.704480', 'step': 6289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:23.760663', 'step': 6289, 'epoch': 1} {'type': 'loss', 'content': 0.1365625560283661, 'timestamp': '2025-09-10 02:41:23.762810', 'step': 6290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:23.818153', 'step': 6290, 'epoch': 1} {'type': 'loss', 'content': 0.24327674508094788, 'timestamp': '2025-09-10 02:41:23.820314', 'step': 6291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:23.873940', 'step': 6291, 'epoch': 1} {'type': 'loss', 'content': 0.23233187198638916, 'timestamp': '2025-09-10 02:41:23.880203', 'step': 6292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:23.932902', 'step': 6292, 'epoch': 1} {'type': 'loss', 'content': 0.11202166974544525, 'timestamp': '2025-09-10 02:41:23.935013', 'step': 6293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:23.987858', 'step': 6293, 'epoch': 1} {'type': 'loss', 'content': 0.34900784492492676, 'timestamp': '2025-09-10 02:41:23.989852', 'step': 6294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:24.043523', 'step': 6294, 'epoch': 1} {'type': 'loss', 'content': 0.19154883921146393, 'timestamp': '2025-09-10 02:41:24.045623', 'step': 6295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:24.098620', 'step': 6295, 'epoch': 1} {'type': 'loss', 'content': 0.124110646545887, 'timestamp': '2025-09-10 02:41:24.104371', 'step': 6296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:24.156706', 'step': 6296, 'epoch': 1} {'type': 'loss', 'content': 0.15900033712387085, 'timestamp': '2025-09-10 02:41:24.158879', 'step': 6297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:24.211811', 'step': 6297, 'epoch': 1} {'type': 'loss', 'content': 0.16818755865097046, 'timestamp': '2025-09-10 02:41:24.213951', 'step': 6298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:24.267306', 'step': 6298, 'epoch': 1} {'type': 'loss', 'content': 0.17050029337406158, 'timestamp': '2025-09-10 02:41:24.269456', 'step': 6299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:24.323739', 'step': 6299, 'epoch': 1} {'type': 'loss', 'content': 0.21234539151191711, 'timestamp': '2025-09-10 02:41:24.329476', 'step': 6300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:24.381607', 'step': 6300, 'epoch': 1} {'type': 'loss', 'content': 0.21212412416934967, 'timestamp': '2025-09-10 02:41:24.383866', 'step': 6301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:24.436675', 'step': 6301, 'epoch': 1} {'type': 'loss', 'content': 0.14896801114082336, 'timestamp': '2025-09-10 02:41:24.438738', 'step': 6302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:24.492019', 'step': 6302, 'epoch': 1} {'type': 'loss', 'content': 0.12244772166013718, 'timestamp': '2025-09-10 02:41:24.494224', 'step': 6303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:24.547302', 'step': 6303, 'epoch': 1} {'type': 'loss', 'content': 0.15916474163532257, 'timestamp': '2025-09-10 02:41:24.553153', 'step': 6304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:24.607949', 'step': 6304, 'epoch': 1} {'type': 'loss', 'content': 0.1260831356048584, 'timestamp': '2025-09-10 02:41:24.609866', 'step': 6305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:24.662653', 'step': 6305, 'epoch': 1} {'type': 'loss', 'content': 0.1406291127204895, 'timestamp': '2025-09-10 02:41:24.664930', 'step': 6306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:24.717823', 'step': 6306, 'epoch': 1} {'type': 'loss', 'content': 0.19676031172275543, 'timestamp': '2025-09-10 02:41:24.719964', 'step': 6307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:24.773055', 'step': 6307, 'epoch': 1} {'type': 'loss', 'content': 0.08051691949367523, 'timestamp': '2025-09-10 02:41:24.778974', 'step': 6308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:24.831799', 'step': 6308, 'epoch': 1} {'type': 'loss', 'content': 0.11401748657226562, 'timestamp': '2025-09-10 02:41:24.833749', 'step': 6309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:24.887174', 'step': 6309, 'epoch': 1} {'type': 'loss', 'content': 0.14148303866386414, 'timestamp': '2025-09-10 02:41:24.889577', 'step': 6310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:24.942620', 'step': 6310, 'epoch': 1} {'type': 'loss', 'content': 0.14286531507968903, 'timestamp': '2025-09-10 02:41:24.944911', 'step': 6311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:25.000254', 'step': 6311, 'epoch': 1} {'type': 'loss', 'content': 0.0784166008234024, 'timestamp': '2025-09-10 02:41:25.006007', 'step': 6312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:25.058436', 'step': 6312, 'epoch': 1} {'type': 'loss', 'content': 0.1566249281167984, 'timestamp': '2025-09-10 02:41:25.060599', 'step': 6313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:25.113097', 'step': 6313, 'epoch': 1} {'type': 'loss', 'content': 0.13971945643424988, 'timestamp': '2025-09-10 02:41:25.115293', 'step': 6314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:25.168376', 'step': 6314, 'epoch': 1} {'type': 'loss', 'content': 0.26029855012893677, 'timestamp': '2025-09-10 02:41:25.170558', 'step': 6315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:25.223837', 'step': 6315, 'epoch': 1} {'type': 'loss', 'content': 0.18816129863262177, 'timestamp': '2025-09-10 02:41:25.229688', 'step': 6316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:25.283202', 'step': 6316, 'epoch': 1} {'type': 'loss', 'content': 0.20022915303707123, 'timestamp': '2025-09-10 02:41:25.285381', 'step': 6317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:25.338558', 'step': 6317, 'epoch': 1} {'type': 'loss', 'content': 0.31474587321281433, 'timestamp': '2025-09-10 02:41:25.340695', 'step': 6318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:25.393714', 'step': 6318, 'epoch': 1} {'type': 'loss', 'content': 0.2933816611766815, 'timestamp': '2025-09-10 02:41:25.395902', 'step': 6319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:25.449322', 'step': 6319, 'epoch': 1} {'type': 'loss', 'content': 0.1444968730211258, 'timestamp': '2025-09-10 02:41:25.455110', 'step': 6320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:25.507500', 'step': 6320, 'epoch': 1} {'type': 'loss', 'content': 0.17169468104839325, 'timestamp': '2025-09-10 02:41:25.509639', 'step': 6321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:25.562647', 'step': 6321, 'epoch': 1} {'type': 'loss', 'content': 0.21237628161907196, 'timestamp': '2025-09-10 02:41:25.564798', 'step': 6322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:25.617791', 'step': 6322, 'epoch': 1} {'type': 'loss', 'content': 0.1232750341296196, 'timestamp': '2025-09-10 02:41:25.619936', 'step': 6323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:25.672939', 'step': 6323, 'epoch': 1} {'type': 'loss', 'content': 0.12192331999540329, 'timestamp': '2025-09-10 02:41:25.678610', 'step': 6324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:25.731890', 'step': 6324, 'epoch': 1} {'type': 'loss', 'content': 0.14796307682991028, 'timestamp': '2025-09-10 02:41:25.734031', 'step': 6325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:25.788284', 'step': 6325, 'epoch': 1} {'type': 'loss', 'content': 0.1264786273241043, 'timestamp': '2025-09-10 02:41:25.790539', 'step': 6326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:25.843681', 'step': 6326, 'epoch': 1} {'type': 'loss', 'content': 0.09952239692211151, 'timestamp': '2025-09-10 02:41:25.845954', 'step': 6327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:25.899169', 'step': 6327, 'epoch': 1} {'type': 'loss', 'content': 0.15908032655715942, 'timestamp': '2025-09-10 02:41:25.905248', 'step': 6328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:25.959055', 'step': 6328, 'epoch': 1} {'type': 'loss', 'content': 0.1415831595659256, 'timestamp': '2025-09-10 02:41:25.961154', 'step': 6329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:26.014908', 'step': 6329, 'epoch': 1} {'type': 'loss', 'content': 0.14195016026496887, 'timestamp': '2025-09-10 02:41:26.017009', 'step': 6330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:26.070186', 'step': 6330, 'epoch': 1} {'type': 'loss', 'content': 0.14992451667785645, 'timestamp': '2025-09-10 02:41:26.072392', 'step': 6331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:26.125722', 'step': 6331, 'epoch': 1} {'type': 'loss', 'content': 0.12745429575443268, 'timestamp': '2025-09-10 02:41:26.131596', 'step': 6332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:26.184461', 'step': 6332, 'epoch': 1} {'type': 'loss', 'content': 0.11639884114265442, 'timestamp': '2025-09-10 02:41:26.186579', 'step': 6333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:26.239675', 'step': 6333, 'epoch': 1} {'type': 'loss', 'content': 0.13347508013248444, 'timestamp': '2025-09-10 02:41:26.241851', 'step': 6334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:26.295094', 'step': 6334, 'epoch': 1} {'type': 'loss', 'content': 0.1183217242360115, 'timestamp': '2025-09-10 02:41:26.297284', 'step': 6335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:26.350616', 'step': 6335, 'epoch': 1} {'type': 'loss', 'content': 0.19053886830806732, 'timestamp': '2025-09-10 02:41:26.356638', 'step': 6336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:26.409198', 'step': 6336, 'epoch': 1} {'type': 'loss', 'content': 0.13100765645503998, 'timestamp': '2025-09-10 02:41:26.411451', 'step': 6337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:26.465144', 'step': 6337, 'epoch': 1} {'type': 'loss', 'content': 0.16255560517311096, 'timestamp': '2025-09-10 02:41:26.467245', 'step': 6338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:26.520887', 'step': 6338, 'epoch': 1} {'type': 'loss', 'content': 0.20464912056922913, 'timestamp': '2025-09-10 02:41:26.523018', 'step': 6339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:26.576933', 'step': 6339, 'epoch': 1} {'type': 'loss', 'content': 0.13711853325366974, 'timestamp': '2025-09-10 02:41:26.582705', 'step': 6340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:26.635357', 'step': 6340, 'epoch': 1} {'type': 'loss', 'content': 0.1272948682308197, 'timestamp': '2025-09-10 02:41:26.638100', 'step': 6341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:26.691027', 'step': 6341, 'epoch': 1} {'type': 'loss', 'content': 0.20850852131843567, 'timestamp': '2025-09-10 02:41:26.693295', 'step': 6342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:26.747193', 'step': 6342, 'epoch': 1} {'type': 'loss', 'content': 0.2270568162202835, 'timestamp': '2025-09-10 02:41:26.750715', 'step': 6343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:26.806937', 'step': 6343, 'epoch': 1} {'type': 'loss', 'content': 0.1649971455335617, 'timestamp': '2025-09-10 02:41:26.812908', 'step': 6344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:26.866160', 'step': 6344, 'epoch': 1} {'type': 'loss', 'content': 0.22777748107910156, 'timestamp': '2025-09-10 02:41:26.868282', 'step': 6345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:26.922025', 'step': 6345, 'epoch': 1} {'type': 'loss', 'content': 0.11554883420467377, 'timestamp': '2025-09-10 02:41:26.924412', 'step': 6346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:26.978442', 'step': 6346, 'epoch': 1} {'type': 'loss', 'content': 0.12356208264827728, 'timestamp': '2025-09-10 02:41:26.980587', 'step': 6347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:27.035316', 'step': 6347, 'epoch': 1} {'type': 'loss', 'content': 0.13058018684387207, 'timestamp': '2025-09-10 02:41:27.041465', 'step': 6348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:27.095613', 'step': 6348, 'epoch': 1} {'type': 'loss', 'content': 0.17088717222213745, 'timestamp': '2025-09-10 02:41:27.097695', 'step': 6349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:27.152343', 'step': 6349, 'epoch': 1} {'type': 'loss', 'content': 0.17101149260997772, 'timestamp': '2025-09-10 02:41:27.154456', 'step': 6350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:27.208452', 'step': 6350, 'epoch': 1} {'type': 'loss', 'content': 0.16777220368385315, 'timestamp': '2025-09-10 02:41:27.210587', 'step': 6351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:27.266680', 'step': 6351, 'epoch': 1} {'type': 'loss', 'content': 0.15587034821510315, 'timestamp': '2025-09-10 02:41:27.272726', 'step': 6352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:27.326201', 'step': 6352, 'epoch': 1} {'type': 'loss', 'content': 0.14055220782756805, 'timestamp': '2025-09-10 02:41:27.328367', 'step': 6353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:27.381973', 'step': 6353, 'epoch': 1} {'type': 'loss', 'content': 0.1349593698978424, 'timestamp': '2025-09-10 02:41:27.384232', 'step': 6354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:27.437918', 'step': 6354, 'epoch': 1} {'type': 'loss', 'content': 0.21127136051654816, 'timestamp': '2025-09-10 02:41:27.440123', 'step': 6355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:27.494072', 'step': 6355, 'epoch': 1} {'type': 'loss', 'content': 0.149579718708992, 'timestamp': '2025-09-10 02:41:27.500336', 'step': 6356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:27.554477', 'step': 6356, 'epoch': 1} {'type': 'loss', 'content': 0.1502505987882614, 'timestamp': '2025-09-10 02:41:27.556846', 'step': 6357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:27.611303', 'step': 6357, 'epoch': 1} {'type': 'loss', 'content': 0.14256255328655243, 'timestamp': '2025-09-10 02:41:27.613712', 'step': 6358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:27.671687', 'step': 6358, 'epoch': 1} {'type': 'loss', 'content': 0.1351134181022644, 'timestamp': '2025-09-10 02:41:27.673930', 'step': 6359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:27.728239', 'step': 6359, 'epoch': 1} {'type': 'loss', 'content': 0.2702935039997101, 'timestamp': '2025-09-10 02:41:27.734386', 'step': 6360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:27.787964', 'step': 6360, 'epoch': 1} {'type': 'loss', 'content': 0.10197552293539047, 'timestamp': '2025-09-10 02:41:27.790165', 'step': 6361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:27.844229', 'step': 6361, 'epoch': 1} {'type': 'loss', 'content': 0.12184401601552963, 'timestamp': '2025-09-10 02:41:27.846550', 'step': 6362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:27.900738', 'step': 6362, 'epoch': 1} {'type': 'loss', 'content': 0.11583805084228516, 'timestamp': '2025-09-10 02:41:27.902988', 'step': 6363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:27.956889', 'step': 6363, 'epoch': 1} {'type': 'loss', 'content': 0.22590497136116028, 'timestamp': '2025-09-10 02:41:27.963121', 'step': 6364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:28.016672', 'step': 6364, 'epoch': 1} {'type': 'loss', 'content': 0.19101029634475708, 'timestamp': '2025-09-10 02:41:28.018803', 'step': 6365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:28.072974', 'step': 6365, 'epoch': 1} {'type': 'loss', 'content': 0.13319002091884613, 'timestamp': '2025-09-10 02:41:28.075274', 'step': 6366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:28.130200', 'step': 6366, 'epoch': 1} {'type': 'loss', 'content': 0.11530396342277527, 'timestamp': '2025-09-10 02:41:28.132481', 'step': 6367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:28.187091', 'step': 6367, 'epoch': 1} {'type': 'loss', 'content': 0.1389426589012146, 'timestamp': '2025-09-10 02:41:28.193173', 'step': 6368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:28.247057', 'step': 6368, 'epoch': 1} {'type': 'loss', 'content': 0.15591023862361908, 'timestamp': '2025-09-10 02:41:28.249120', 'step': 6369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:28.303479', 'step': 6369, 'epoch': 1} {'type': 'loss', 'content': 0.17119427025318146, 'timestamp': '2025-09-10 02:41:28.305806', 'step': 6370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:28.361191', 'step': 6370, 'epoch': 1} {'type': 'loss', 'content': 0.14876557886600494, 'timestamp': '2025-09-10 02:41:28.363586', 'step': 6371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:28.417821', 'step': 6371, 'epoch': 1} {'type': 'loss', 'content': 0.08768955618143082, 'timestamp': '2025-09-10 02:41:28.424281', 'step': 6372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:28.477452', 'step': 6372, 'epoch': 1} {'type': 'loss', 'content': 0.1578221172094345, 'timestamp': '2025-09-10 02:41:28.479709', 'step': 6373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:28.534058', 'step': 6373, 'epoch': 1} {'type': 'loss', 'content': 0.09995216876268387, 'timestamp': '2025-09-10 02:41:28.536167', 'step': 6374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:28.590368', 'step': 6374, 'epoch': 1} {'type': 'loss', 'content': 0.16970230638980865, 'timestamp': '2025-09-10 02:41:28.592567', 'step': 6375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:28.647584', 'step': 6375, 'epoch': 1} {'type': 'loss', 'content': 0.22768832743167877, 'timestamp': '2025-09-10 02:41:28.654260', 'step': 6376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:28.707952', 'step': 6376, 'epoch': 1} {'type': 'loss', 'content': 0.19874067604541779, 'timestamp': '2025-09-10 02:41:28.710123', 'step': 6377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:28.767281', 'step': 6377, 'epoch': 1} {'type': 'loss', 'content': 0.12478528916835785, 'timestamp': '2025-09-10 02:41:28.769548', 'step': 6378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:28.832802', 'step': 6378, 'epoch': 1} {'type': 'loss', 'content': 0.17082670331001282, 'timestamp': '2025-09-10 02:41:28.834885', 'step': 6379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:28.890047', 'step': 6379, 'epoch': 1} {'type': 'loss', 'content': 0.19406625628471375, 'timestamp': '2025-09-10 02:41:28.897444', 'step': 6380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:28.951677', 'step': 6380, 'epoch': 1} {'type': 'loss', 'content': 0.09556205570697784, 'timestamp': '2025-09-10 02:41:28.953632', 'step': 6381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:29.013723', 'step': 6381, 'epoch': 1} {'type': 'loss', 'content': 0.16069649159908295, 'timestamp': '2025-09-10 02:41:29.015904', 'step': 6382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:29.069993', 'step': 6382, 'epoch': 1} {'type': 'loss', 'content': 0.13792657852172852, 'timestamp': '2025-09-10 02:41:29.072194', 'step': 6383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:29.126200', 'step': 6383, 'epoch': 1} {'type': 'loss', 'content': 0.10641144961118698, 'timestamp': '2025-09-10 02:41:29.132243', 'step': 6384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:29.186072', 'step': 6384, 'epoch': 1} {'type': 'loss', 'content': 0.18812775611877441, 'timestamp': '2025-09-10 02:41:29.190131', 'step': 6385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:29.244695', 'step': 6385, 'epoch': 1} {'type': 'loss', 'content': 0.2127602994441986, 'timestamp': '2025-09-10 02:41:29.246739', 'step': 6386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:29.301565', 'step': 6386, 'epoch': 1} {'type': 'loss', 'content': 0.23064666986465454, 'timestamp': '2025-09-10 02:41:29.303788', 'step': 6387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:29.358456', 'step': 6387, 'epoch': 1} {'type': 'loss', 'content': 0.21651603281497955, 'timestamp': '2025-09-10 02:41:29.364291', 'step': 6388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:29.418414', 'step': 6388, 'epoch': 1} {'type': 'loss', 'content': 0.17237168550491333, 'timestamp': '2025-09-10 02:41:29.420574', 'step': 6389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:29.475584', 'step': 6389, 'epoch': 1} {'type': 'loss', 'content': 0.15898397564888, 'timestamp': '2025-09-10 02:41:29.477760', 'step': 6390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:29.533410', 'step': 6390, 'epoch': 1} {'type': 'loss', 'content': 0.09893786162137985, 'timestamp': '2025-09-10 02:41:29.535687', 'step': 6391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:29.590694', 'step': 6391, 'epoch': 1} {'type': 'loss', 'content': 0.23619744181632996, 'timestamp': '2025-09-10 02:41:29.596805', 'step': 6392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:29.652014', 'step': 6392, 'epoch': 1} {'type': 'loss', 'content': 0.14793872833251953, 'timestamp': '2025-09-10 02:41:29.653979', 'step': 6393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:29.709863', 'step': 6393, 'epoch': 1} {'type': 'loss', 'content': 0.17748652398586273, 'timestamp': '2025-09-10 02:41:29.712042', 'step': 6394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:29.766435', 'step': 6394, 'epoch': 1} {'type': 'loss', 'content': 0.1980219930410385, 'timestamp': '2025-09-10 02:41:29.768639', 'step': 6395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:29.822763', 'step': 6395, 'epoch': 1} {'type': 'loss', 'content': 0.2319708913564682, 'timestamp': '2025-09-10 02:41:29.828713', 'step': 6396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:29.881705', 'step': 6396, 'epoch': 1} {'type': 'loss', 'content': 0.22272410988807678, 'timestamp': '2025-09-10 02:41:29.883840', 'step': 6397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:29.939197', 'step': 6397, 'epoch': 1} {'type': 'loss', 'content': 0.18296030163764954, 'timestamp': '2025-09-10 02:41:29.941118', 'step': 6398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:29.996922', 'step': 6398, 'epoch': 1} {'type': 'loss', 'content': 0.19240018725395203, 'timestamp': '2025-09-10 02:41:29.999259', 'step': 6399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:30.054730', 'step': 6399, 'epoch': 1} {'type': 'loss', 'content': 0.10956035554409027, 'timestamp': '2025-09-10 02:41:30.061332', 'step': 6400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:30.115970', 'step': 6400, 'epoch': 1} {'type': 'loss', 'content': 0.21229998767375946, 'timestamp': '2025-09-10 02:41:30.117857', 'step': 6401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:30.171510', 'step': 6401, 'epoch': 1} {'type': 'loss', 'content': 0.17706139385700226, 'timestamp': '2025-09-10 02:41:30.173447', 'step': 6402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:30.228819', 'step': 6402, 'epoch': 1} {'type': 'loss', 'content': 0.20923101902008057, 'timestamp': '2025-09-10 02:41:30.230873', 'step': 6403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:30.286439', 'step': 6403, 'epoch': 1} {'type': 'loss', 'content': 0.13844266533851624, 'timestamp': '2025-09-10 02:41:30.292508', 'step': 6404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:30.347220', 'step': 6404, 'epoch': 1} {'type': 'loss', 'content': 0.2024812251329422, 'timestamp': '2025-09-10 02:41:30.349150', 'step': 6405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:30.403947', 'step': 6405, 'epoch': 1} {'type': 'loss', 'content': 0.0843738466501236, 'timestamp': '2025-09-10 02:41:30.405894', 'step': 6406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:30.459183', 'step': 6406, 'epoch': 1} {'type': 'loss', 'content': 0.20286431908607483, 'timestamp': '2025-09-10 02:41:30.461297', 'step': 6407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:30.516185', 'step': 6407, 'epoch': 1} {'type': 'loss', 'content': 0.12484350055456161, 'timestamp': '2025-09-10 02:41:30.522116', 'step': 6408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:30.576036', 'step': 6408, 'epoch': 1} {'type': 'loss', 'content': 0.11186674237251282, 'timestamp': '2025-09-10 02:41:30.578151', 'step': 6409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:30.633809', 'step': 6409, 'epoch': 1} {'type': 'loss', 'content': 0.14572644233703613, 'timestamp': '2025-09-10 02:41:30.635927', 'step': 6410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:30.692290', 'step': 6410, 'epoch': 1} {'type': 'loss', 'content': 0.08601076900959015, 'timestamp': '2025-09-10 02:41:30.694302', 'step': 6411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:30.749985', 'step': 6411, 'epoch': 1} {'type': 'loss', 'content': 0.11825805902481079, 'timestamp': '2025-09-10 02:41:30.756042', 'step': 6412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:30.812117', 'step': 6412, 'epoch': 1} {'type': 'loss', 'content': 0.12462425231933594, 'timestamp': '2025-09-10 02:41:30.814051', 'step': 6413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:30.869267', 'step': 6413, 'epoch': 1} {'type': 'loss', 'content': 0.21384313702583313, 'timestamp': '2025-09-10 02:41:30.871331', 'step': 6414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:30.926330', 'step': 6414, 'epoch': 1} {'type': 'loss', 'content': 0.14400720596313477, 'timestamp': '2025-09-10 02:41:30.928468', 'step': 6415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:30.984120', 'step': 6415, 'epoch': 1} {'type': 'loss', 'content': 0.14173446595668793, 'timestamp': '2025-09-10 02:41:30.990146', 'step': 6416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:31.044552', 'step': 6416, 'epoch': 1} {'type': 'loss', 'content': 0.11285103112459183, 'timestamp': '2025-09-10 02:41:31.046496', 'step': 6417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:31.100589', 'step': 6417, 'epoch': 1} {'type': 'loss', 'content': 0.09213656187057495, 'timestamp': '2025-09-10 02:41:31.102786', 'step': 6418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:31.156493', 'step': 6418, 'epoch': 1} {'type': 'loss', 'content': 0.18888457119464874, 'timestamp': '2025-09-10 02:41:31.158452', 'step': 6419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:31.213548', 'step': 6419, 'epoch': 1} {'type': 'loss', 'content': 0.18022426962852478, 'timestamp': '2025-09-10 02:41:31.219522', 'step': 6420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:31.274338', 'step': 6420, 'epoch': 1} {'type': 'loss', 'content': 0.15166524052619934, 'timestamp': '2025-09-10 02:41:31.276362', 'step': 6421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:31.330683', 'step': 6421, 'epoch': 1} {'type': 'loss', 'content': 0.2609543800354004, 'timestamp': '2025-09-10 02:41:31.332620', 'step': 6422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:31.389149', 'step': 6422, 'epoch': 1} {'type': 'loss', 'content': 0.11817144602537155, 'timestamp': '2025-09-10 02:41:31.391050', 'step': 6423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:31.448911', 'step': 6423, 'epoch': 1} {'type': 'loss', 'content': 0.10332699865102768, 'timestamp': '2025-09-10 02:41:31.455026', 'step': 6424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:31.508637', 'step': 6424, 'epoch': 1} {'type': 'loss', 'content': 0.05175476148724556, 'timestamp': '2025-09-10 02:41:31.510678', 'step': 6425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:31.564732', 'step': 6425, 'epoch': 1} {'type': 'loss', 'content': 0.08866265416145325, 'timestamp': '2025-09-10 02:41:31.566748', 'step': 6426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:31.621679', 'step': 6426, 'epoch': 1} {'type': 'loss', 'content': 0.15304897725582123, 'timestamp': '2025-09-10 02:41:31.623721', 'step': 6427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:31.677816', 'step': 6427, 'epoch': 1} {'type': 'loss', 'content': 0.23342415690422058, 'timestamp': '2025-09-10 02:41:31.683886', 'step': 6428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:31.741666', 'step': 6428, 'epoch': 1} {'type': 'loss', 'content': 0.11780396848917007, 'timestamp': '2025-09-10 02:41:31.743772', 'step': 6429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:31.800391', 'step': 6429, 'epoch': 1} {'type': 'loss', 'content': 0.176687553524971, 'timestamp': '2025-09-10 02:41:31.802426', 'step': 6430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:31.856222', 'step': 6430, 'epoch': 1} {'type': 'loss', 'content': 0.15237347781658173, 'timestamp': '2025-09-10 02:41:31.858226', 'step': 6431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:31.911959', 'step': 6431, 'epoch': 1} {'type': 'loss', 'content': 0.18607081472873688, 'timestamp': '2025-09-10 02:41:31.918245', 'step': 6432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:31.975532', 'step': 6432, 'epoch': 1} {'type': 'loss', 'content': 0.15286605060100555, 'timestamp': '2025-09-10 02:41:31.977501', 'step': 6433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:32.032793', 'step': 6433, 'epoch': 1} {'type': 'loss', 'content': 0.10854387283325195, 'timestamp': '2025-09-10 02:41:32.034758', 'step': 6434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:32.089908', 'step': 6434, 'epoch': 1} {'type': 'loss', 'content': 0.1939067840576172, 'timestamp': '2025-09-10 02:41:32.091800', 'step': 6435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:32.147301', 'step': 6435, 'epoch': 1} {'type': 'loss', 'content': 0.15522274374961853, 'timestamp': '2025-09-10 02:41:32.153316', 'step': 6436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:32.210467', 'step': 6436, 'epoch': 1} {'type': 'loss', 'content': 0.2036435455083847, 'timestamp': '2025-09-10 02:41:32.212399', 'step': 6437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:32.265420', 'step': 6437, 'epoch': 1} {'type': 'loss', 'content': 0.12664593756198883, 'timestamp': '2025-09-10 02:41:32.267350', 'step': 6438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:32.320672', 'step': 6438, 'epoch': 1} {'type': 'loss', 'content': 0.1525893360376358, 'timestamp': '2025-09-10 02:41:32.322821', 'step': 6439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:32.377193', 'step': 6439, 'epoch': 1} {'type': 'loss', 'content': 0.19927576184272766, 'timestamp': '2025-09-10 02:41:32.383324', 'step': 6440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:32.437234', 'step': 6440, 'epoch': 1} {'type': 'loss', 'content': 0.173900306224823, 'timestamp': '2025-09-10 02:41:32.439327', 'step': 6441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:32.494077', 'step': 6441, 'epoch': 1} {'type': 'loss', 'content': 0.14271365106105804, 'timestamp': '2025-09-10 02:41:32.496277', 'step': 6442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:32.567051', 'step': 6442, 'epoch': 1} {'type': 'loss', 'content': 0.11637929081916809, 'timestamp': '2025-09-10 02:41:32.569399', 'step': 6443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:32.623170', 'step': 6443, 'epoch': 1} {'type': 'loss', 'content': 0.16202901303768158, 'timestamp': '2025-09-10 02:41:32.629394', 'step': 6444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:32.683645', 'step': 6444, 'epoch': 1} {'type': 'loss', 'content': 0.16779768466949463, 'timestamp': '2025-09-10 02:41:32.686108', 'step': 6445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:32.743211', 'step': 6445, 'epoch': 1} {'type': 'loss', 'content': 0.22862109541893005, 'timestamp': '2025-09-10 02:41:32.745665', 'step': 6446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:32.800280', 'step': 6446, 'epoch': 1} {'type': 'loss', 'content': 0.22408978641033173, 'timestamp': '2025-09-10 02:41:32.802653', 'step': 6447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:32.855810', 'step': 6447, 'epoch': 1} {'type': 'loss', 'content': 0.252702921628952, 'timestamp': '2025-09-10 02:41:32.861589', 'step': 6448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:32.913841', 'step': 6448, 'epoch': 1} {'type': 'loss', 'content': 0.19600725173950195, 'timestamp': '2025-09-10 02:41:32.917019', 'step': 6449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:32.971478', 'step': 6449, 'epoch': 1} {'type': 'loss', 'content': 0.1069633960723877, 'timestamp': '2025-09-10 02:41:32.973416', 'step': 6450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:33.027692', 'step': 6450, 'epoch': 1} {'type': 'loss', 'content': 0.1677856296300888, 'timestamp': '2025-09-10 02:41:33.029607', 'step': 6451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:33.082973', 'step': 6451, 'epoch': 1} {'type': 'loss', 'content': 0.16190050542354584, 'timestamp': '2025-09-10 02:41:33.088637', 'step': 6452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:33.140951', 'step': 6452, 'epoch': 1} {'type': 'loss', 'content': 0.1414453238248825, 'timestamp': '2025-09-10 02:41:33.142847', 'step': 6453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:33.195846', 'step': 6453, 'epoch': 1} {'type': 'loss', 'content': 0.14135758578777313, 'timestamp': '2025-09-10 02:41:33.199022', 'step': 6454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:33.252738', 'step': 6454, 'epoch': 1} {'type': 'loss', 'content': 0.14543312788009644, 'timestamp': '2025-09-10 02:41:33.254663', 'step': 6455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:33.307953', 'step': 6455, 'epoch': 1} {'type': 'loss', 'content': 0.16624833643436432, 'timestamp': '2025-09-10 02:41:33.313657', 'step': 6456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:33.366068', 'step': 6456, 'epoch': 1} {'type': 'loss', 'content': 0.14971818029880524, 'timestamp': '2025-09-10 02:41:33.368190', 'step': 6457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:33.422459', 'step': 6457, 'epoch': 1} {'type': 'loss', 'content': 0.08229625225067139, 'timestamp': '2025-09-10 02:41:33.424545', 'step': 6458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:33.478114', 'step': 6458, 'epoch': 1} {'type': 'loss', 'content': 0.16282473504543304, 'timestamp': '2025-09-10 02:41:33.480059', 'step': 6459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:33.533719', 'step': 6459, 'epoch': 1} {'type': 'loss', 'content': 0.11056093871593475, 'timestamp': '2025-09-10 02:41:33.539457', 'step': 6460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:33.592378', 'step': 6460, 'epoch': 1} {'type': 'loss', 'content': 0.16530947387218475, 'timestamp': '2025-09-10 02:41:33.594448', 'step': 6461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:33.648503', 'step': 6461, 'epoch': 1} {'type': 'loss', 'content': 0.0992593914270401, 'timestamp': '2025-09-10 02:41:33.650649', 'step': 6462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:33.704547', 'step': 6462, 'epoch': 1} {'type': 'loss', 'content': 0.21271225810050964, 'timestamp': '2025-09-10 02:41:33.706757', 'step': 6463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:33.760856', 'step': 6463, 'epoch': 1} {'type': 'loss', 'content': 0.1785118579864502, 'timestamp': '2025-09-10 02:41:33.766568', 'step': 6464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:33.819270', 'step': 6464, 'epoch': 1} {'type': 'loss', 'content': 0.20299649238586426, 'timestamp': '2025-09-10 02:41:33.821199', 'step': 6465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:33.874242', 'step': 6465, 'epoch': 1} {'type': 'loss', 'content': 0.1131756603717804, 'timestamp': '2025-09-10 02:41:33.877538', 'step': 6466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:33.932770', 'step': 6466, 'epoch': 1} {'type': 'loss', 'content': 0.13559269905090332, 'timestamp': '2025-09-10 02:41:33.934855', 'step': 6467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:33.989733', 'step': 6467, 'epoch': 1} {'type': 'loss', 'content': 0.2528266906738281, 'timestamp': '2025-09-10 02:41:33.995819', 'step': 6468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.050060', 'step': 6468, 'epoch': 1} {'type': 'loss', 'content': 0.14542226493358612, 'timestamp': '2025-09-10 02:41:34.051964', 'step': 6469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.106478', 'step': 6469, 'epoch': 1} {'type': 'loss', 'content': 0.09503151476383209, 'timestamp': '2025-09-10 02:41:34.108423', 'step': 6470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:34.162852', 'step': 6470, 'epoch': 1} {'type': 'loss', 'content': 0.16009943187236786, 'timestamp': '2025-09-10 02:41:34.165044', 'step': 6471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.220376', 'step': 6471, 'epoch': 1} {'type': 'loss', 'content': 0.10311857610940933, 'timestamp': '2025-09-10 02:41:34.226822', 'step': 6472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.281034', 'step': 6472, 'epoch': 1} {'type': 'loss', 'content': 0.10921956598758698, 'timestamp': '2025-09-10 02:41:34.283456', 'step': 6473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:34.338511', 'step': 6473, 'epoch': 1} {'type': 'loss', 'content': 0.11279863119125366, 'timestamp': '2025-09-10 02:41:34.340491', 'step': 6474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:34.395895', 'step': 6474, 'epoch': 1} {'type': 'loss', 'content': 0.23593811690807343, 'timestamp': '2025-09-10 02:41:34.397862', 'step': 6475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:34.454071', 'step': 6475, 'epoch': 1} {'type': 'loss', 'content': 0.121293805539608, 'timestamp': '2025-09-10 02:41:34.460103', 'step': 6476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.516607', 'step': 6476, 'epoch': 1} {'type': 'loss', 'content': 0.14841917157173157, 'timestamp': '2025-09-10 02:41:34.518634', 'step': 6477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.573912', 'step': 6477, 'epoch': 1} {'type': 'loss', 'content': 0.17361757159233093, 'timestamp': '2025-09-10 02:41:34.575831', 'step': 6478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.631603', 'step': 6478, 'epoch': 1} {'type': 'loss', 'content': 0.20863638818264008, 'timestamp': '2025-09-10 02:41:34.633620', 'step': 6479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.687521', 'step': 6479, 'epoch': 1} {'type': 'loss', 'content': 0.1444820910692215, 'timestamp': '2025-09-10 02:41:34.693485', 'step': 6480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.746855', 'step': 6480, 'epoch': 1} {'type': 'loss', 'content': 0.17397037148475647, 'timestamp': '2025-09-10 02:41:34.748922', 'step': 6481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:34.803282', 'step': 6481, 'epoch': 1} {'type': 'loss', 'content': 0.14811596274375916, 'timestamp': '2025-09-10 02:41:34.805390', 'step': 6482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:34.862413', 'step': 6482, 'epoch': 1} {'type': 'loss', 'content': 0.16512946784496307, 'timestamp': '2025-09-10 02:41:34.864385', 'step': 6483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:34.917900', 'step': 6483, 'epoch': 1} {'type': 'loss', 'content': 0.11706425249576569, 'timestamp': '2025-09-10 02:41:34.923854', 'step': 6484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:34.976218', 'step': 6484, 'epoch': 1} {'type': 'loss', 'content': 0.1784181147813797, 'timestamp': '2025-09-10 02:41:34.978216', 'step': 6485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:35.031759', 'step': 6485, 'epoch': 1} {'type': 'loss', 'content': 0.13611991703510284, 'timestamp': '2025-09-10 02:41:35.033889', 'step': 6486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:35.087566', 'step': 6486, 'epoch': 1} {'type': 'loss', 'content': 0.16612961888313293, 'timestamp': '2025-09-10 02:41:35.089690', 'step': 6487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:35.145864', 'step': 6487, 'epoch': 1} {'type': 'loss', 'content': 0.12944281101226807, 'timestamp': '2025-09-10 02:41:35.151985', 'step': 6488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:35.205596', 'step': 6488, 'epoch': 1} {'type': 'loss', 'content': 0.1499752700328827, 'timestamp': '2025-09-10 02:41:35.207539', 'step': 6489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:35.260807', 'step': 6489, 'epoch': 1} {'type': 'loss', 'content': 0.15818722546100616, 'timestamp': '2025-09-10 02:41:35.262939', 'step': 6490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:35.316495', 'step': 6490, 'epoch': 1} {'type': 'loss', 'content': 0.12432686984539032, 'timestamp': '2025-09-10 02:41:35.318569', 'step': 6491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:35.371508', 'step': 6491, 'epoch': 1} {'type': 'loss', 'content': 0.1513548642396927, 'timestamp': '2025-09-10 02:41:35.377235', 'step': 6492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:35.429512', 'step': 6492, 'epoch': 1} {'type': 'loss', 'content': 0.20623748004436493, 'timestamp': '2025-09-10 02:41:35.431624', 'step': 6493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:35.485918', 'step': 6493, 'epoch': 1} {'type': 'loss', 'content': 0.15112052857875824, 'timestamp': '2025-09-10 02:41:35.487870', 'step': 6494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:35.541242', 'step': 6494, 'epoch': 1} {'type': 'loss', 'content': 0.16676054894924164, 'timestamp': '2025-09-10 02:41:35.543308', 'step': 6495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:35.596921', 'step': 6495, 'epoch': 1} {'type': 'loss', 'content': 0.14774291217327118, 'timestamp': '2025-09-10 02:41:35.602827', 'step': 6496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:35.658626', 'step': 6496, 'epoch': 1} {'type': 'loss', 'content': 0.1461194008588791, 'timestamp': '2025-09-10 02:41:35.660572', 'step': 6497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:35.713499', 'step': 6497, 'epoch': 1} {'type': 'loss', 'content': 0.15568803250789642, 'timestamp': '2025-09-10 02:41:35.715373', 'step': 6498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:35.769351', 'step': 6498, 'epoch': 1} {'type': 'loss', 'content': 0.198342964053154, 'timestamp': '2025-09-10 02:41:35.771450', 'step': 6499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:35.824411', 'step': 6499, 'epoch': 1} {'type': 'loss', 'content': 0.2705400884151459, 'timestamp': '2025-09-10 02:41:35.830206', 'step': 6500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-09-10 02:41:36.249408', 'step': 6500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:36.309450', 'step': 6500, 'epoch': 1} {'type': 'loss', 'content': 0.2252250462770462, 'timestamp': '2025-09-10 02:41:36.311467', 'step': 6501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:36.368841', 'step': 6501, 'epoch': 1} {'type': 'loss', 'content': 0.2003440260887146, 'timestamp': '2025-09-10 02:41:36.372171', 'step': 6502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:36.428405', 'step': 6502, 'epoch': 1} {'type': 'loss', 'content': 0.14381472766399384, 'timestamp': '2025-09-10 02:41:36.430101', 'step': 6503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:36.485185', 'step': 6503, 'epoch': 1} {'type': 'loss', 'content': 0.21832150220870972, 'timestamp': '2025-09-10 02:41:36.491179', 'step': 6504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:36.544847', 'step': 6504, 'epoch': 1} {'type': 'loss', 'content': 0.13921833038330078, 'timestamp': '2025-09-10 02:41:36.546642', 'step': 6505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:36.602140', 'step': 6505, 'epoch': 1} {'type': 'loss', 'content': 0.09025520831346512, 'timestamp': '2025-09-10 02:41:36.604230', 'step': 6506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:36.662074', 'step': 6506, 'epoch': 1} {'type': 'loss', 'content': 0.15789921581745148, 'timestamp': '2025-09-10 02:41:36.664090', 'step': 6507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:36.718930', 'step': 6507, 'epoch': 1} {'type': 'loss', 'content': 0.1624009758234024, 'timestamp': '2025-09-10 02:41:36.724902', 'step': 6508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:36.779256', 'step': 6508, 'epoch': 1} {'type': 'loss', 'content': 0.17729629576206207, 'timestamp': '2025-09-10 02:41:36.781179', 'step': 6509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:36.837400', 'step': 6509, 'epoch': 1} {'type': 'loss', 'content': 0.10833244770765305, 'timestamp': '2025-09-10 02:41:36.839220', 'step': 6510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:36.895460', 'step': 6510, 'epoch': 1} {'type': 'loss', 'content': 0.13826456665992737, 'timestamp': '2025-09-10 02:41:36.897208', 'step': 6511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:36.951438', 'step': 6511, 'epoch': 1} {'type': 'loss', 'content': 0.18196944892406464, 'timestamp': '2025-09-10 02:41:36.957147', 'step': 6512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:37.017513', 'step': 6512, 'epoch': 1} {'type': 'loss', 'content': 0.11386898905038834, 'timestamp': '2025-09-10 02:41:37.019433', 'step': 6513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:37.074643', 'step': 6513, 'epoch': 1} {'type': 'loss', 'content': 0.19459319114685059, 'timestamp': '2025-09-10 02:41:37.076608', 'step': 6514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:37.130867', 'step': 6514, 'epoch': 1} {'type': 'loss', 'content': 0.14451229572296143, 'timestamp': '2025-09-10 02:41:37.132995', 'step': 6515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:37.191446', 'step': 6515, 'epoch': 1} {'type': 'loss', 'content': 0.15681982040405273, 'timestamp': '2025-09-10 02:41:37.197681', 'step': 6516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:37.253797', 'step': 6516, 'epoch': 1} {'type': 'loss', 'content': 0.1023729220032692, 'timestamp': '2025-09-10 02:41:37.255805', 'step': 6517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:37.311100', 'step': 6517, 'epoch': 1} {'type': 'loss', 'content': 0.1497398167848587, 'timestamp': '2025-09-10 02:41:37.312803', 'step': 6518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:37.367775', 'step': 6518, 'epoch': 1} {'type': 'loss', 'content': 0.22488099336624146, 'timestamp': '2025-09-10 02:41:37.369623', 'step': 6519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:37.424349', 'step': 6519, 'epoch': 1} {'type': 'loss', 'content': 0.1198064461350441, 'timestamp': '2025-09-10 02:41:37.430079', 'step': 6520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:37.484502', 'step': 6520, 'epoch': 1} {'type': 'loss', 'content': 0.12537524104118347, 'timestamp': '2025-09-10 02:41:37.486254', 'step': 6521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:37.539874', 'step': 6521, 'epoch': 1} {'type': 'loss', 'content': 0.18458540737628937, 'timestamp': '2025-09-10 02:41:37.541886', 'step': 6522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:37.596508', 'step': 6522, 'epoch': 1} {'type': 'loss', 'content': 0.117497019469738, 'timestamp': '2025-09-10 02:41:37.598515', 'step': 6523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:37.653130', 'step': 6523, 'epoch': 1} {'type': 'loss', 'content': 0.18754123151302338, 'timestamp': '2025-09-10 02:41:37.659201', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:41:50.369008', 'step': 6524, 'epoch': 1} {'type': 'pplx', 'content': 13623.26235452983, 'timestamp': '2025-09-10 02:41:50.371796', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:50.425450', 'step': 6524, 'epoch': 1} {'type': 'loss', 'content': 0.13124606013298035, 'timestamp': '2025-09-10 02:41:50.427728', 'step': 6525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:50.481268', 'step': 6525, 'epoch': 1} {'type': 'loss', 'content': 0.11010503023862839, 'timestamp': '2025-09-10 02:41:50.483296', 'step': 6526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:50.536468', 'step': 6526, 'epoch': 1} {'type': 'loss', 'content': 0.19685178995132446, 'timestamp': '2025-09-10 02:41:50.538462', 'step': 6527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:50.591676', 'step': 6527, 'epoch': 1} {'type': 'loss', 'content': 0.11431508511304855, 'timestamp': '2025-09-10 02:41:50.597743', 'step': 6528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:50.657407', 'step': 6528, 'epoch': 1} {'type': 'loss', 'content': 0.16015838086605072, 'timestamp': '2025-09-10 02:41:50.659585', 'step': 6529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:50.713858', 'step': 6529, 'epoch': 1} {'type': 'loss', 'content': 0.2808293104171753, 'timestamp': '2025-09-10 02:41:50.715877', 'step': 6530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:50.768970', 'step': 6530, 'epoch': 1} {'type': 'loss', 'content': 0.17553837597370148, 'timestamp': '2025-09-10 02:41:50.771089', 'step': 6531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:50.824321', 'step': 6531, 'epoch': 1} {'type': 'loss', 'content': 0.1245620995759964, 'timestamp': '2025-09-10 02:41:50.830253', 'step': 6532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:50.882863', 'step': 6532, 'epoch': 1} {'type': 'loss', 'content': 0.12102000415325165, 'timestamp': '2025-09-10 02:41:50.885225', 'step': 6533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:50.937943', 'step': 6533, 'epoch': 1} {'type': 'loss', 'content': 0.1414053738117218, 'timestamp': '2025-09-10 02:41:50.940110', 'step': 6534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:50.993195', 'step': 6534, 'epoch': 1} {'type': 'loss', 'content': 0.09878065437078476, 'timestamp': '2025-09-10 02:41:50.995334', 'step': 6535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:51.048207', 'step': 6535, 'epoch': 1} {'type': 'loss', 'content': 0.15836867690086365, 'timestamp': '2025-09-10 02:41:51.054322', 'step': 6536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:51.107663', 'step': 6536, 'epoch': 1} {'type': 'loss', 'content': 0.08366236090660095, 'timestamp': '2025-09-10 02:41:51.109766', 'step': 6537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:51.162989', 'step': 6537, 'epoch': 1} {'type': 'loss', 'content': 0.18303726613521576, 'timestamp': '2025-09-10 02:41:51.165312', 'step': 6538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:51.218442', 'step': 6538, 'epoch': 1} {'type': 'loss', 'content': 0.17931081354618073, 'timestamp': '2025-09-10 02:41:51.220674', 'step': 6539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:51.273595', 'step': 6539, 'epoch': 1} {'type': 'loss', 'content': 0.14981135725975037, 'timestamp': '2025-09-10 02:41:51.279484', 'step': 6540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:51.332886', 'step': 6540, 'epoch': 1} {'type': 'loss', 'content': 0.16032035648822784, 'timestamp': '2025-09-10 02:41:51.335041', 'step': 6541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:51.389343', 'step': 6541, 'epoch': 1} {'type': 'loss', 'content': 0.13235509395599365, 'timestamp': '2025-09-10 02:41:51.391712', 'step': 6542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:51.445333', 'step': 6542, 'epoch': 1} {'type': 'loss', 'content': 0.18824730813503265, 'timestamp': '2025-09-10 02:41:51.447532', 'step': 6543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:51.501127', 'step': 6543, 'epoch': 1} {'type': 'loss', 'content': 0.21027202904224396, 'timestamp': '2025-09-10 02:41:51.506946', 'step': 6544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:51.559566', 'step': 6544, 'epoch': 1} {'type': 'loss', 'content': 0.09929803013801575, 'timestamp': '2025-09-10 02:41:51.561854', 'step': 6545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:51.614824', 'step': 6545, 'epoch': 1} {'type': 'loss', 'content': 0.1456330567598343, 'timestamp': '2025-09-10 02:41:51.619307', 'step': 6546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:51.672904', 'step': 6546, 'epoch': 1} {'type': 'loss', 'content': 0.06217566877603531, 'timestamp': '2025-09-10 02:41:51.675231', 'step': 6547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:51.728473', 'step': 6547, 'epoch': 1} {'type': 'loss', 'content': 0.10747376829385757, 'timestamp': '2025-09-10 02:41:51.734562', 'step': 6548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:51.787158', 'step': 6548, 'epoch': 1} {'type': 'loss', 'content': 0.17251801490783691, 'timestamp': '2025-09-10 02:41:51.789450', 'step': 6549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:51.842567', 'step': 6549, 'epoch': 1} {'type': 'loss', 'content': 0.2051699012517929, 'timestamp': '2025-09-10 02:41:51.844833', 'step': 6550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:51.898202', 'step': 6550, 'epoch': 1} {'type': 'loss', 'content': 0.17979247868061066, 'timestamp': '2025-09-10 02:41:51.901740', 'step': 6551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:51.955424', 'step': 6551, 'epoch': 1} {'type': 'loss', 'content': 0.17791680991649628, 'timestamp': '2025-09-10 02:41:51.961523', 'step': 6552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:52.013794', 'step': 6552, 'epoch': 1} {'type': 'loss', 'content': 0.1861146092414856, 'timestamp': '2025-09-10 02:41:52.018512', 'step': 6553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:52.077392', 'step': 6553, 'epoch': 1} {'type': 'loss', 'content': 0.04914495348930359, 'timestamp': '2025-09-10 02:41:52.079637', 'step': 6554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:52.132864', 'step': 6554, 'epoch': 1} {'type': 'loss', 'content': 0.14659538865089417, 'timestamp': '2025-09-10 02:41:52.135187', 'step': 6555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:52.193564', 'step': 6555, 'epoch': 1} {'type': 'loss', 'content': 0.21387606859207153, 'timestamp': '2025-09-10 02:41:52.199557', 'step': 6556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:52.260572', 'step': 6556, 'epoch': 1} {'type': 'loss', 'content': 0.11051705479621887, 'timestamp': '2025-09-10 02:41:52.267588', 'step': 6557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:52.327851', 'step': 6557, 'epoch': 1} {'type': 'loss', 'content': 0.13518838584423065, 'timestamp': '2025-09-10 02:41:52.330124', 'step': 6558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:52.385175', 'step': 6558, 'epoch': 1} {'type': 'loss', 'content': 0.2280137985944748, 'timestamp': '2025-09-10 02:41:52.387471', 'step': 6559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:52.440051', 'step': 6559, 'epoch': 1} {'type': 'loss', 'content': 0.1667119562625885, 'timestamp': '2025-09-10 02:41:52.446331', 'step': 6560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:52.500618', 'step': 6560, 'epoch': 1} {'type': 'loss', 'content': 0.14252817630767822, 'timestamp': '2025-09-10 02:41:52.502746', 'step': 6561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:52.556399', 'step': 6561, 'epoch': 1} {'type': 'loss', 'content': 0.16220779716968536, 'timestamp': '2025-09-10 02:41:52.558730', 'step': 6562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:52.611884', 'step': 6562, 'epoch': 1} {'type': 'loss', 'content': 0.22360014915466309, 'timestamp': '2025-09-10 02:41:52.614293', 'step': 6563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:52.676172', 'step': 6563, 'epoch': 1} {'type': 'loss', 'content': 0.22423586249351501, 'timestamp': '2025-09-10 02:41:52.682495', 'step': 6564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:52.738471', 'step': 6564, 'epoch': 1} {'type': 'loss', 'content': 0.1157444566488266, 'timestamp': '2025-09-10 02:41:52.740788', 'step': 6565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:52.795281', 'step': 6565, 'epoch': 1} {'type': 'loss', 'content': 0.145253986120224, 'timestamp': '2025-09-10 02:41:52.797671', 'step': 6566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:52.851840', 'step': 6566, 'epoch': 1} {'type': 'loss', 'content': 0.13797323405742645, 'timestamp': '2025-09-10 02:41:52.856123', 'step': 6567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:52.913128', 'step': 6567, 'epoch': 1} {'type': 'loss', 'content': 0.13856551051139832, 'timestamp': '2025-09-10 02:41:52.919557', 'step': 6568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:52.972540', 'step': 6568, 'epoch': 1} {'type': 'loss', 'content': 0.1447390913963318, 'timestamp': '2025-09-10 02:41:52.974899', 'step': 6569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:53.028928', 'step': 6569, 'epoch': 1} {'type': 'loss', 'content': 0.2837525010108948, 'timestamp': '2025-09-10 02:41:53.031317', 'step': 6570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:53.085074', 'step': 6570, 'epoch': 1} {'type': 'loss', 'content': 0.14471372961997986, 'timestamp': '2025-09-10 02:41:53.087506', 'step': 6571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:53.140915', 'step': 6571, 'epoch': 1} {'type': 'loss', 'content': 0.1642863154411316, 'timestamp': '2025-09-10 02:41:53.147129', 'step': 6572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:53.200727', 'step': 6572, 'epoch': 1} {'type': 'loss', 'content': 0.10454314202070236, 'timestamp': '2025-09-10 02:41:53.203115', 'step': 6573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:53.258298', 'step': 6573, 'epoch': 1} {'type': 'loss', 'content': 0.11402761936187744, 'timestamp': '2025-09-10 02:41:53.260570', 'step': 6574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:53.314448', 'step': 6574, 'epoch': 1} {'type': 'loss', 'content': 0.1865789145231247, 'timestamp': '2025-09-10 02:41:53.316772', 'step': 6575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:53.370464', 'step': 6575, 'epoch': 1} {'type': 'loss', 'content': 0.11296800523996353, 'timestamp': '2025-09-10 02:41:53.377198', 'step': 6576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:53.430689', 'step': 6576, 'epoch': 1} {'type': 'loss', 'content': 0.11252228915691376, 'timestamp': '2025-09-10 02:41:53.432709', 'step': 6577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:53.486223', 'step': 6577, 'epoch': 1} {'type': 'loss', 'content': 0.15951336920261383, 'timestamp': '2025-09-10 02:41:53.488687', 'step': 6578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:53.542478', 'step': 6578, 'epoch': 1} {'type': 'loss', 'content': 0.13035979866981506, 'timestamp': '2025-09-10 02:41:53.544823', 'step': 6579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:53.599161', 'step': 6579, 'epoch': 1} {'type': 'loss', 'content': 0.2088535726070404, 'timestamp': '2025-09-10 02:41:53.605590', 'step': 6580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:53.661587', 'step': 6580, 'epoch': 1} {'type': 'loss', 'content': 0.18077580630779266, 'timestamp': '2025-09-10 02:41:53.663952', 'step': 6581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:53.717555', 'step': 6581, 'epoch': 1} {'type': 'loss', 'content': 0.13464170694351196, 'timestamp': '2025-09-10 02:41:53.720055', 'step': 6582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:53.774945', 'step': 6582, 'epoch': 1} {'type': 'loss', 'content': 0.12757398188114166, 'timestamp': '2025-09-10 02:41:53.777313', 'step': 6583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:53.831136', 'step': 6583, 'epoch': 1} {'type': 'loss', 'content': 0.2148619145154953, 'timestamp': '2025-09-10 02:41:53.837650', 'step': 6584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:53.890783', 'step': 6584, 'epoch': 1} {'type': 'loss', 'content': 0.24140791594982147, 'timestamp': '2025-09-10 02:41:53.893032', 'step': 6585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:53.946855', 'step': 6585, 'epoch': 1} {'type': 'loss', 'content': 0.09980447590351105, 'timestamp': '2025-09-10 02:41:53.949115', 'step': 6586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:54.002453', 'step': 6586, 'epoch': 1} {'type': 'loss', 'content': 0.14388222992420197, 'timestamp': '2025-09-10 02:41:54.004664', 'step': 6587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:54.057614', 'step': 6587, 'epoch': 1} {'type': 'loss', 'content': 0.16458943486213684, 'timestamp': '2025-09-10 02:41:54.063864', 'step': 6588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:54.116701', 'step': 6588, 'epoch': 1} {'type': 'loss', 'content': 0.1432945281267166, 'timestamp': '2025-09-10 02:41:54.119008', 'step': 6589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:54.174228', 'step': 6589, 'epoch': 1} {'type': 'loss', 'content': 0.1326991468667984, 'timestamp': '2025-09-10 02:41:54.176564', 'step': 6590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:54.231439', 'step': 6590, 'epoch': 1} {'type': 'loss', 'content': 0.10962318629026413, 'timestamp': '2025-09-10 02:41:54.233738', 'step': 6591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:54.287357', 'step': 6591, 'epoch': 1} {'type': 'loss', 'content': 0.248830646276474, 'timestamp': '2025-09-10 02:41:54.293692', 'step': 6592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:54.348046', 'step': 6592, 'epoch': 1} {'type': 'loss', 'content': 0.10976681113243103, 'timestamp': '2025-09-10 02:41:54.350325', 'step': 6593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:54.408999', 'step': 6593, 'epoch': 1} {'type': 'loss', 'content': 0.16582734882831573, 'timestamp': '2025-09-10 02:41:54.411327', 'step': 6594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:54.464276', 'step': 6594, 'epoch': 1} {'type': 'loss', 'content': 0.12902657687664032, 'timestamp': '2025-09-10 02:41:54.466525', 'step': 6595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:54.520962', 'step': 6595, 'epoch': 1} {'type': 'loss', 'content': 0.13725757598876953, 'timestamp': '2025-09-10 02:41:54.527430', 'step': 6596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:54.579980', 'step': 6596, 'epoch': 1} {'type': 'loss', 'content': 0.21131831407546997, 'timestamp': '2025-09-10 02:41:54.582421', 'step': 6597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:54.635499', 'step': 6597, 'epoch': 1} {'type': 'loss', 'content': 0.07284372299909592, 'timestamp': '2025-09-10 02:41:54.637796', 'step': 6598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:54.691095', 'step': 6598, 'epoch': 1} {'type': 'loss', 'content': 0.10052304714918137, 'timestamp': '2025-09-10 02:41:54.693396', 'step': 6599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:54.747083', 'step': 6599, 'epoch': 1} {'type': 'loss', 'content': 0.09046977013349533, 'timestamp': '2025-09-10 02:41:54.753078', 'step': 6600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:54.806655', 'step': 6600, 'epoch': 1} {'type': 'loss', 'content': 0.11553425341844559, 'timestamp': '2025-09-10 02:41:54.808943', 'step': 6601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:54.862391', 'step': 6601, 'epoch': 1} {'type': 'loss', 'content': 0.14687789976596832, 'timestamp': '2025-09-10 02:41:54.864696', 'step': 6602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:54.918027', 'step': 6602, 'epoch': 1} {'type': 'loss', 'content': 0.20586079359054565, 'timestamp': '2025-09-10 02:41:54.920457', 'step': 6603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:54.974174', 'step': 6603, 'epoch': 1} {'type': 'loss', 'content': 0.28341227769851685, 'timestamp': '2025-09-10 02:41:54.980665', 'step': 6604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:55.035183', 'step': 6604, 'epoch': 1} {'type': 'loss', 'content': 0.08374613523483276, 'timestamp': '2025-09-10 02:41:55.037621', 'step': 6605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:55.092988', 'step': 6605, 'epoch': 1} {'type': 'loss', 'content': 0.14022572338581085, 'timestamp': '2025-09-10 02:41:55.095469', 'step': 6606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:55.152074', 'step': 6606, 'epoch': 1} {'type': 'loss', 'content': 0.11857825517654419, 'timestamp': '2025-09-10 02:41:55.154371', 'step': 6607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:55.211006', 'step': 6607, 'epoch': 1} {'type': 'loss', 'content': 0.15969052910804749, 'timestamp': '2025-09-10 02:41:55.217737', 'step': 6608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:55.272634', 'step': 6608, 'epoch': 1} {'type': 'loss', 'content': 0.16744373738765717, 'timestamp': '2025-09-10 02:41:55.275052', 'step': 6609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:55.331509', 'step': 6609, 'epoch': 1} {'type': 'loss', 'content': 0.23923593759536743, 'timestamp': '2025-09-10 02:41:55.334117', 'step': 6610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:55.394225', 'step': 6610, 'epoch': 1} {'type': 'loss', 'content': 0.16958612203598022, 'timestamp': '2025-09-10 02:41:55.396698', 'step': 6611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:55.454733', 'step': 6611, 'epoch': 1} {'type': 'loss', 'content': 0.14919017255306244, 'timestamp': '2025-09-10 02:41:55.461546', 'step': 6612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:55.517068', 'step': 6612, 'epoch': 1} {'type': 'loss', 'content': 0.17862792313098907, 'timestamp': '2025-09-10 02:41:55.519184', 'step': 6613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:55.573145', 'step': 6613, 'epoch': 1} {'type': 'loss', 'content': 0.15177573263645172, 'timestamp': '2025-09-10 02:41:55.575327', 'step': 6614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:55.630271', 'step': 6614, 'epoch': 1} {'type': 'loss', 'content': 0.09568402171134949, 'timestamp': '2025-09-10 02:41:55.632519', 'step': 6615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:55.687054', 'step': 6615, 'epoch': 1} {'type': 'loss', 'content': 0.23598302900791168, 'timestamp': '2025-09-10 02:41:55.692989', 'step': 6616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:55.745833', 'step': 6616, 'epoch': 1} {'type': 'loss', 'content': 0.1563521772623062, 'timestamp': '2025-09-10 02:41:55.748397', 'step': 6617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:55.801680', 'step': 6617, 'epoch': 1} {'type': 'loss', 'content': 0.09667599946260452, 'timestamp': '2025-09-10 02:41:55.804012', 'step': 6618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:55.857049', 'step': 6618, 'epoch': 1} {'type': 'loss', 'content': 0.12928315997123718, 'timestamp': '2025-09-10 02:41:55.859468', 'step': 6619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:55.913005', 'step': 6619, 'epoch': 1} {'type': 'loss', 'content': 0.12256169319152832, 'timestamp': '2025-09-10 02:41:55.919012', 'step': 6620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:55.972045', 'step': 6620, 'epoch': 1} {'type': 'loss', 'content': 0.19827046990394592, 'timestamp': '2025-09-10 02:41:55.974187', 'step': 6621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:56.027427', 'step': 6621, 'epoch': 1} {'type': 'loss', 'content': 0.12212539464235306, 'timestamp': '2025-09-10 02:41:56.029515', 'step': 6622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:56.082743', 'step': 6622, 'epoch': 1} {'type': 'loss', 'content': 0.2074270248413086, 'timestamp': '2025-09-10 02:41:56.084899', 'step': 6623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:56.138698', 'step': 6623, 'epoch': 1} {'type': 'loss', 'content': 0.11581383645534515, 'timestamp': '2025-09-10 02:41:56.144685', 'step': 6624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:56.197191', 'step': 6624, 'epoch': 1} {'type': 'loss', 'content': 0.18889263272285461, 'timestamp': '2025-09-10 02:41:56.199499', 'step': 6625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:56.252445', 'step': 6625, 'epoch': 1} {'type': 'loss', 'content': 0.1669357717037201, 'timestamp': '2025-09-10 02:41:56.254822', 'step': 6626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:56.308393', 'step': 6626, 'epoch': 1} {'type': 'loss', 'content': 0.10060128569602966, 'timestamp': '2025-09-10 02:41:56.310657', 'step': 6627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:56.366281', 'step': 6627, 'epoch': 1} {'type': 'loss', 'content': 0.20050621032714844, 'timestamp': '2025-09-10 02:41:56.373531', 'step': 6628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:56.433333', 'step': 6628, 'epoch': 1} {'type': 'loss', 'content': 0.20869487524032593, 'timestamp': '2025-09-10 02:41:56.436011', 'step': 6629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:56.497398', 'step': 6629, 'epoch': 1} {'type': 'loss', 'content': 0.1221175342798233, 'timestamp': '2025-09-10 02:41:56.499815', 'step': 6630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:56.560675', 'step': 6630, 'epoch': 1} {'type': 'loss', 'content': 0.24811233580112457, 'timestamp': '2025-09-10 02:41:56.563316', 'step': 6631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:56.623217', 'step': 6631, 'epoch': 1} {'type': 'loss', 'content': 0.10850910097360611, 'timestamp': '2025-09-10 02:41:56.630635', 'step': 6632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:56.689691', 'step': 6632, 'epoch': 1} {'type': 'loss', 'content': 0.1398855298757553, 'timestamp': '2025-09-10 02:41:56.692045', 'step': 6633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:56.747184', 'step': 6633, 'epoch': 1} {'type': 'loss', 'content': 0.15637102723121643, 'timestamp': '2025-09-10 02:41:56.749557', 'step': 6634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:56.805296', 'step': 6634, 'epoch': 1} {'type': 'loss', 'content': 0.2479037344455719, 'timestamp': '2025-09-10 02:41:56.807788', 'step': 6635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:56.861239', 'step': 6635, 'epoch': 1} {'type': 'loss', 'content': 0.17360593378543854, 'timestamp': '2025-09-10 02:41:56.867519', 'step': 6636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:56.922448', 'step': 6636, 'epoch': 1} {'type': 'loss', 'content': 0.059438787400722504, 'timestamp': '2025-09-10 02:41:56.924720', 'step': 6637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:56.978447', 'step': 6637, 'epoch': 1} {'type': 'loss', 'content': 0.15280047059059143, 'timestamp': '2025-09-10 02:41:56.980749', 'step': 6638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:57.034291', 'step': 6638, 'epoch': 1} {'type': 'loss', 'content': 0.15018978714942932, 'timestamp': '2025-09-10 02:41:57.036653', 'step': 6639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:57.089613', 'step': 6639, 'epoch': 1} {'type': 'loss', 'content': 0.1408296823501587, 'timestamp': '2025-09-10 02:41:57.095744', 'step': 6640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:57.148670', 'step': 6640, 'epoch': 1} {'type': 'loss', 'content': 0.25368326902389526, 'timestamp': '2025-09-10 02:41:57.150970', 'step': 6641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:57.204083', 'step': 6641, 'epoch': 1} {'type': 'loss', 'content': 0.14502575993537903, 'timestamp': '2025-09-10 02:41:57.206204', 'step': 6642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:57.259300', 'step': 6642, 'epoch': 1} {'type': 'loss', 'content': 0.2696705460548401, 'timestamp': '2025-09-10 02:41:57.261674', 'step': 6643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:57.315856', 'step': 6643, 'epoch': 1} {'type': 'loss', 'content': 0.1771049052476883, 'timestamp': '2025-09-10 02:41:57.321916', 'step': 6644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:57.374640', 'step': 6644, 'epoch': 1} {'type': 'loss', 'content': 0.09050703793764114, 'timestamp': '2025-09-10 02:41:57.376875', 'step': 6645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:57.430195', 'step': 6645, 'epoch': 1} {'type': 'loss', 'content': 0.14109951257705688, 'timestamp': '2025-09-10 02:41:57.432502', 'step': 6646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:57.485296', 'step': 6646, 'epoch': 1} {'type': 'loss', 'content': 0.08584253489971161, 'timestamp': '2025-09-10 02:41:57.487775', 'step': 6647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:57.542226', 'step': 6647, 'epoch': 1} {'type': 'loss', 'content': 0.08914386481046677, 'timestamp': '2025-09-10 02:41:57.548299', 'step': 6648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:57.605767', 'step': 6648, 'epoch': 1} {'type': 'loss', 'content': 0.2527148127555847, 'timestamp': '2025-09-10 02:41:57.608176', 'step': 6649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:57.661707', 'step': 6649, 'epoch': 1} {'type': 'loss', 'content': 0.2553156018257141, 'timestamp': '2025-09-10 02:41:57.664119', 'step': 6650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:57.717846', 'step': 6650, 'epoch': 1} {'type': 'loss', 'content': 0.09376373142004013, 'timestamp': '2025-09-10 02:41:57.720234', 'step': 6651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:57.773352', 'step': 6651, 'epoch': 1} {'type': 'loss', 'content': 0.224832221865654, 'timestamp': '2025-09-10 02:41:57.779299', 'step': 6652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:57.832054', 'step': 6652, 'epoch': 1} {'type': 'loss', 'content': 0.21878546476364136, 'timestamp': '2025-09-10 02:41:57.834460', 'step': 6653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:57.888327', 'step': 6653, 'epoch': 1} {'type': 'loss', 'content': 0.11409884691238403, 'timestamp': '2025-09-10 02:41:57.890743', 'step': 6654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:57.944149', 'step': 6654, 'epoch': 1} {'type': 'loss', 'content': 0.20896807312965393, 'timestamp': '2025-09-10 02:41:57.946628', 'step': 6655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:58.000313', 'step': 6655, 'epoch': 1} {'type': 'loss', 'content': 0.19146975874900818, 'timestamp': '2025-09-10 02:41:58.006205', 'step': 6656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:58.059745', 'step': 6656, 'epoch': 1} {'type': 'loss', 'content': 0.1550890952348709, 'timestamp': '2025-09-10 02:41:58.062027', 'step': 6657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:58.115988', 'step': 6657, 'epoch': 1} {'type': 'loss', 'content': 0.21162272989749908, 'timestamp': '2025-09-10 02:41:58.118331', 'step': 6658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:58.172456', 'step': 6658, 'epoch': 1} {'type': 'loss', 'content': 0.11579222232103348, 'timestamp': '2025-09-10 02:41:58.174883', 'step': 6659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:58.227853', 'step': 6659, 'epoch': 1} {'type': 'loss', 'content': 0.18343636393547058, 'timestamp': '2025-09-10 02:41:58.234412', 'step': 6660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:58.287160', 'step': 6660, 'epoch': 1} {'type': 'loss', 'content': 0.06051427125930786, 'timestamp': '2025-09-10 02:41:58.289458', 'step': 6661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:58.349407', 'step': 6661, 'epoch': 1} {'type': 'loss', 'content': 0.10711360722780228, 'timestamp': '2025-09-10 02:41:58.354435', 'step': 6662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:58.407822', 'step': 6662, 'epoch': 1} {'type': 'loss', 'content': 0.12410982698202133, 'timestamp': '2025-09-10 02:41:58.409973', 'step': 6663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:58.465787', 'step': 6663, 'epoch': 1} {'type': 'loss', 'content': 0.14243507385253906, 'timestamp': '2025-09-10 02:41:58.471833', 'step': 6664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:58.525980', 'step': 6664, 'epoch': 1} {'type': 'loss', 'content': 0.14585961401462555, 'timestamp': '2025-09-10 02:41:58.538122', 'step': 6665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:58.594101', 'step': 6665, 'epoch': 1} {'type': 'loss', 'content': 0.10036526620388031, 'timestamp': '2025-09-10 02:41:58.596446', 'step': 6666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:58.652937', 'step': 6666, 'epoch': 1} {'type': 'loss', 'content': 0.30920228362083435, 'timestamp': '2025-09-10 02:41:58.656622', 'step': 6667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:58.717280', 'step': 6667, 'epoch': 1} {'type': 'loss', 'content': 0.15039733052253723, 'timestamp': '2025-09-10 02:41:58.723499', 'step': 6668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:58.782190', 'step': 6668, 'epoch': 1} {'type': 'loss', 'content': 0.17978127300739288, 'timestamp': '2025-09-10 02:41:58.784652', 'step': 6669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:58.837792', 'step': 6669, 'epoch': 1} {'type': 'loss', 'content': 0.09509832412004471, 'timestamp': '2025-09-10 02:41:58.840035', 'step': 6670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:58.893463', 'step': 6670, 'epoch': 1} {'type': 'loss', 'content': 0.04735076427459717, 'timestamp': '2025-09-10 02:41:58.895931', 'step': 6671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:58.949321', 'step': 6671, 'epoch': 1} {'type': 'loss', 'content': 0.1366102546453476, 'timestamp': '2025-09-10 02:41:58.955323', 'step': 6672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:59.007885', 'step': 6672, 'epoch': 1} {'type': 'loss', 'content': 0.20189931988716125, 'timestamp': '2025-09-10 02:41:59.010169', 'step': 6673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:59.064785', 'step': 6673, 'epoch': 1} {'type': 'loss', 'content': 0.1997174173593521, 'timestamp': '2025-09-10 02:41:59.072322', 'step': 6674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:59.126457', 'step': 6674, 'epoch': 1} {'type': 'loss', 'content': 0.20240776240825653, 'timestamp': '2025-09-10 02:41:59.132046', 'step': 6675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:59.184922', 'step': 6675, 'epoch': 1} {'type': 'loss', 'content': 0.0902872234582901, 'timestamp': '2025-09-10 02:41:59.190961', 'step': 6676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:59.244027', 'step': 6676, 'epoch': 1} {'type': 'loss', 'content': 0.12686306238174438, 'timestamp': '2025-09-10 02:41:59.246819', 'step': 6677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:59.300438', 'step': 6677, 'epoch': 1} {'type': 'loss', 'content': 0.13789504766464233, 'timestamp': '2025-09-10 02:41:59.304725', 'step': 6678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:59.362976', 'step': 6678, 'epoch': 1} {'type': 'loss', 'content': 0.18935704231262207, 'timestamp': '2025-09-10 02:41:59.365307', 'step': 6679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:59.420739', 'step': 6679, 'epoch': 1} {'type': 'loss', 'content': 0.19564636051654816, 'timestamp': '2025-09-10 02:41:59.427282', 'step': 6680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:59.488288', 'step': 6680, 'epoch': 1} {'type': 'loss', 'content': 0.11493170261383057, 'timestamp': '2025-09-10 02:41:59.490582', 'step': 6681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:59.547696', 'step': 6681, 'epoch': 1} {'type': 'loss', 'content': 0.16473127901554108, 'timestamp': '2025-09-10 02:41:59.550121', 'step': 6682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:41:59.618795', 'step': 6682, 'epoch': 1} {'type': 'loss', 'content': 0.22293131053447723, 'timestamp': '2025-09-10 02:41:59.623743', 'step': 6683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:59.701792', 'step': 6683, 'epoch': 1} {'type': 'loss', 'content': 0.16789540648460388, 'timestamp': '2025-09-10 02:41:59.708088', 'step': 6684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:41:59.781322', 'step': 6684, 'epoch': 1} {'type': 'loss', 'content': 0.19052039086818695, 'timestamp': '2025-09-10 02:41:59.783626', 'step': 6685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:59.869298', 'step': 6685, 'epoch': 1} {'type': 'loss', 'content': 0.11172658950090408, 'timestamp': '2025-09-10 02:41:59.871387', 'step': 6686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:59.927302', 'step': 6686, 'epoch': 1} {'type': 'loss', 'content': 0.12221501767635345, 'timestamp': '2025-09-10 02:41:59.929424', 'step': 6687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:41:59.985848', 'step': 6687, 'epoch': 1} {'type': 'loss', 'content': 0.17776770889759064, 'timestamp': '2025-09-10 02:41:59.991990', 'step': 6688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:00.051797', 'step': 6688, 'epoch': 1} {'type': 'loss', 'content': 0.24171113967895508, 'timestamp': '2025-09-10 02:42:00.054085', 'step': 6689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:00.110408', 'step': 6689, 'epoch': 1} {'type': 'loss', 'content': 0.20842839777469635, 'timestamp': '2025-09-10 02:42:00.123911', 'step': 6690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:00.179838', 'step': 6690, 'epoch': 1} {'type': 'loss', 'content': 0.19527849555015564, 'timestamp': '2025-09-10 02:42:00.182143', 'step': 6691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:00.235294', 'step': 6691, 'epoch': 1} {'type': 'loss', 'content': 0.14938756823539734, 'timestamp': '2025-09-10 02:42:00.241206', 'step': 6692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:00.294234', 'step': 6692, 'epoch': 1} {'type': 'loss', 'content': 0.18005307018756866, 'timestamp': '2025-09-10 02:42:00.296616', 'step': 6693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:00.354698', 'step': 6693, 'epoch': 1} {'type': 'loss', 'content': 0.13437725603580475, 'timestamp': '2025-09-10 02:42:00.360400', 'step': 6694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:00.416669', 'step': 6694, 'epoch': 1} {'type': 'loss', 'content': 0.13710568845272064, 'timestamp': '2025-09-10 02:42:00.423456', 'step': 6695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:00.486410', 'step': 6695, 'epoch': 1} {'type': 'loss', 'content': 0.14551132917404175, 'timestamp': '2025-09-10 02:42:00.494127', 'step': 6696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:00.547801', 'step': 6696, 'epoch': 1} {'type': 'loss', 'content': 0.16887986660003662, 'timestamp': '2025-09-10 02:42:00.552413', 'step': 6697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:00.608780', 'step': 6697, 'epoch': 1} {'type': 'loss', 'content': 0.1829378753900528, 'timestamp': '2025-09-10 02:42:00.615895', 'step': 6698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:00.683170', 'step': 6698, 'epoch': 1} {'type': 'loss', 'content': 0.19501926004886627, 'timestamp': '2025-09-10 02:42:00.685609', 'step': 6699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:00.739735', 'step': 6699, 'epoch': 1} {'type': 'loss', 'content': 0.13080160319805145, 'timestamp': '2025-09-10 02:42:00.747199', 'step': 6700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:00.804650', 'step': 6700, 'epoch': 1} {'type': 'loss', 'content': 0.17777466773986816, 'timestamp': '2025-09-10 02:42:00.808620', 'step': 6701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:00.872064', 'step': 6701, 'epoch': 1} {'type': 'loss', 'content': 0.24526408314704895, 'timestamp': '2025-09-10 02:42:00.874337', 'step': 6702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:00.968713', 'step': 6702, 'epoch': 1} {'type': 'loss', 'content': 0.1502249836921692, 'timestamp': '2025-09-10 02:42:00.970796', 'step': 6703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:01.028726', 'step': 6703, 'epoch': 1} {'type': 'loss', 'content': 0.15842939913272858, 'timestamp': '2025-09-10 02:42:01.035010', 'step': 6704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:01.107033', 'step': 6704, 'epoch': 1} {'type': 'loss', 'content': 0.17727507650852203, 'timestamp': '2025-09-10 02:42:01.109227', 'step': 6705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:01.172374', 'step': 6705, 'epoch': 1} {'type': 'loss', 'content': 0.1881546676158905, 'timestamp': '2025-09-10 02:42:01.174500', 'step': 6706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:01.228708', 'step': 6706, 'epoch': 1} {'type': 'loss', 'content': 0.15526209771633148, 'timestamp': '2025-09-10 02:42:01.232794', 'step': 6707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:01.290397', 'step': 6707, 'epoch': 1} {'type': 'loss', 'content': 0.15664057433605194, 'timestamp': '2025-09-10 02:42:01.296181', 'step': 6708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:01.349083', 'step': 6708, 'epoch': 1} {'type': 'loss', 'content': 0.09387321025133133, 'timestamp': '2025-09-10 02:42:01.351665', 'step': 6709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:01.409489', 'step': 6709, 'epoch': 1} {'type': 'loss', 'content': 0.23159675300121307, 'timestamp': '2025-09-10 02:42:01.411642', 'step': 6710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:01.466081', 'step': 6710, 'epoch': 1} {'type': 'loss', 'content': 0.24126970767974854, 'timestamp': '2025-09-10 02:42:01.468741', 'step': 6711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:01.539465', 'step': 6711, 'epoch': 1} {'type': 'loss', 'content': 0.1543745994567871, 'timestamp': '2025-09-10 02:42:01.545539', 'step': 6712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:01.617175', 'step': 6712, 'epoch': 1} {'type': 'loss', 'content': 0.13236738741397858, 'timestamp': '2025-09-10 02:42:01.619572', 'step': 6713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:01.677125', 'step': 6713, 'epoch': 1} {'type': 'loss', 'content': 0.15598192811012268, 'timestamp': '2025-09-10 02:42:01.679901', 'step': 6714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:01.739926', 'step': 6714, 'epoch': 1} {'type': 'loss', 'content': 0.09566949307918549, 'timestamp': '2025-09-10 02:42:01.742698', 'step': 6715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:01.803497', 'step': 6715, 'epoch': 1} {'type': 'loss', 'content': 0.16960322856903076, 'timestamp': '2025-09-10 02:42:01.811118', 'step': 6716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:01.883826', 'step': 6716, 'epoch': 1} {'type': 'loss', 'content': 0.1964457482099533, 'timestamp': '2025-09-10 02:42:01.886122', 'step': 6717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:01.943101', 'step': 6717, 'epoch': 1} {'type': 'loss', 'content': 0.14153240621089935, 'timestamp': '2025-09-10 02:42:01.945648', 'step': 6718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:02.001250', 'step': 6718, 'epoch': 1} {'type': 'loss', 'content': 0.17524589598178864, 'timestamp': '2025-09-10 02:42:02.004800', 'step': 6719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:02.066103', 'step': 6719, 'epoch': 1} {'type': 'loss', 'content': 0.14799410104751587, 'timestamp': '2025-09-10 02:42:02.072082', 'step': 6720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:02.126888', 'step': 6720, 'epoch': 1} {'type': 'loss', 'content': 0.2132880538702011, 'timestamp': '2025-09-10 02:42:02.129288', 'step': 6721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:02.189354', 'step': 6721, 'epoch': 1} {'type': 'loss', 'content': 0.27782079577445984, 'timestamp': '2025-09-10 02:42:02.198034', 'step': 6722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:02.257142', 'step': 6722, 'epoch': 1} {'type': 'loss', 'content': 0.19997817277908325, 'timestamp': '2025-09-10 02:42:02.259391', 'step': 6723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:02.332198', 'step': 6723, 'epoch': 1} {'type': 'loss', 'content': 0.1028364822268486, 'timestamp': '2025-09-10 02:42:02.338262', 'step': 6724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:02.415216', 'step': 6724, 'epoch': 1} {'type': 'loss', 'content': 0.11310269683599472, 'timestamp': '2025-09-10 02:42:02.420754', 'step': 6725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:02.485598', 'step': 6725, 'epoch': 1} {'type': 'loss', 'content': 0.11370736360549927, 'timestamp': '2025-09-10 02:42:02.488314', 'step': 6726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:02.548022', 'step': 6726, 'epoch': 1} {'type': 'loss', 'content': 0.11851823329925537, 'timestamp': '2025-09-10 02:42:02.557303', 'step': 6727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:02.620349', 'step': 6727, 'epoch': 1} {'type': 'loss', 'content': 0.10123658925294876, 'timestamp': '2025-09-10 02:42:02.626643', 'step': 6728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:02.695607', 'step': 6728, 'epoch': 1} {'type': 'loss', 'content': 0.1747177541255951, 'timestamp': '2025-09-10 02:42:02.697980', 'step': 6729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:02.761998', 'step': 6729, 'epoch': 1} {'type': 'loss', 'content': 0.20281778275966644, 'timestamp': '2025-09-10 02:42:02.764759', 'step': 6730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:02.852623', 'step': 6730, 'epoch': 1} {'type': 'loss', 'content': 0.21266157925128937, 'timestamp': '2025-09-10 02:42:02.856086', 'step': 6731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:02.933320', 'step': 6731, 'epoch': 1} {'type': 'loss', 'content': 0.26348721981048584, 'timestamp': '2025-09-10 02:42:02.939447', 'step': 6732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:03.030805', 'step': 6732, 'epoch': 1} {'type': 'loss', 'content': 0.12410072982311249, 'timestamp': '2025-09-10 02:42:03.033274', 'step': 6733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:03.094371', 'step': 6733, 'epoch': 1} {'type': 'loss', 'content': 0.18808415532112122, 'timestamp': '2025-09-10 02:42:03.096908', 'step': 6734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:03.155066', 'step': 6734, 'epoch': 1} {'type': 'loss', 'content': 0.06801502406597137, 'timestamp': '2025-09-10 02:42:03.157523', 'step': 6735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:03.212305', 'step': 6735, 'epoch': 1} {'type': 'loss', 'content': 0.17229798436164856, 'timestamp': '2025-09-10 02:42:03.218813', 'step': 6736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:03.272407', 'step': 6736, 'epoch': 1} {'type': 'loss', 'content': 0.17251652479171753, 'timestamp': '2025-09-10 02:42:03.274927', 'step': 6737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:03.329211', 'step': 6737, 'epoch': 1} {'type': 'loss', 'content': 0.14713990688323975, 'timestamp': '2025-09-10 02:42:03.331548', 'step': 6738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:03.391294', 'step': 6738, 'epoch': 1} {'type': 'loss', 'content': 0.1705782562494278, 'timestamp': '2025-09-10 02:42:03.393731', 'step': 6739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:03.449506', 'step': 6739, 'epoch': 1} {'type': 'loss', 'content': 0.13635484874248505, 'timestamp': '2025-09-10 02:42:03.455709', 'step': 6740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:03.511912', 'step': 6740, 'epoch': 1} {'type': 'loss', 'content': 0.208610400557518, 'timestamp': '2025-09-10 02:42:03.514534', 'step': 6741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:03.571820', 'step': 6741, 'epoch': 1} {'type': 'loss', 'content': 0.13772276043891907, 'timestamp': '2025-09-10 02:42:03.574309', 'step': 6742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:03.629796', 'step': 6742, 'epoch': 1} {'type': 'loss', 'content': 0.13097670674324036, 'timestamp': '2025-09-10 02:42:03.632121', 'step': 6743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:03.691207', 'step': 6743, 'epoch': 1} {'type': 'loss', 'content': 0.13409705460071564, 'timestamp': '2025-09-10 02:42:03.697343', 'step': 6744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:03.759063', 'step': 6744, 'epoch': 1} {'type': 'loss', 'content': 0.12661898136138916, 'timestamp': '2025-09-10 02:42:03.761474', 'step': 6745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:03.820061', 'step': 6745, 'epoch': 1} {'type': 'loss', 'content': 0.13701078295707703, 'timestamp': '2025-09-10 02:42:03.824965', 'step': 6746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:03.879736', 'step': 6746, 'epoch': 1} {'type': 'loss', 'content': 0.1480463445186615, 'timestamp': '2025-09-10 02:42:03.882104', 'step': 6747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:03.935137', 'step': 6747, 'epoch': 1} {'type': 'loss', 'content': 0.1945486068725586, 'timestamp': '2025-09-10 02:42:03.941145', 'step': 6748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:03.993973', 'step': 6748, 'epoch': 1} {'type': 'loss', 'content': 0.116758331656456, 'timestamp': '2025-09-10 02:42:03.996438', 'step': 6749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:04.052612', 'step': 6749, 'epoch': 1} {'type': 'loss', 'content': 0.24420394003391266, 'timestamp': '2025-09-10 02:42:04.054877', 'step': 6750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:04.109000', 'step': 6750, 'epoch': 1} {'type': 'loss', 'content': 0.11331509053707123, 'timestamp': '2025-09-10 02:42:04.111525', 'step': 6751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:04.168056', 'step': 6751, 'epoch': 1} {'type': 'loss', 'content': 0.1806817650794983, 'timestamp': '2025-09-10 02:42:04.175034', 'step': 6752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:04.231101', 'step': 6752, 'epoch': 1} {'type': 'loss', 'content': 0.17681780457496643, 'timestamp': '2025-09-10 02:42:04.233588', 'step': 6753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:04.286122', 'step': 6753, 'epoch': 1} {'type': 'loss', 'content': 0.13708823919296265, 'timestamp': '2025-09-10 02:42:04.288357', 'step': 6754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:04.342277', 'step': 6754, 'epoch': 1} {'type': 'loss', 'content': 0.1635764092206955, 'timestamp': '2025-09-10 02:42:04.344957', 'step': 6755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:04.401849', 'step': 6755, 'epoch': 1} {'type': 'loss', 'content': 0.15670910477638245, 'timestamp': '2025-09-10 02:42:04.408111', 'step': 6756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:04.460981', 'step': 6756, 'epoch': 1} {'type': 'loss', 'content': 0.11889690160751343, 'timestamp': '2025-09-10 02:42:04.463404', 'step': 6757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:04.545176', 'step': 6757, 'epoch': 1} {'type': 'loss', 'content': 0.06047625094652176, 'timestamp': '2025-09-10 02:42:04.548399', 'step': 6758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:04.630947', 'step': 6758, 'epoch': 1} {'type': 'loss', 'content': 0.1455923467874527, 'timestamp': '2025-09-10 02:42:04.633323', 'step': 6759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:04.688566', 'step': 6759, 'epoch': 1} {'type': 'loss', 'content': 0.12382730096578598, 'timestamp': '2025-09-10 02:42:04.694963', 'step': 6760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:04.749243', 'step': 6760, 'epoch': 1} {'type': 'loss', 'content': 0.14257097244262695, 'timestamp': '2025-09-10 02:42:04.751414', 'step': 6761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:04.804582', 'step': 6761, 'epoch': 1} {'type': 'loss', 'content': 0.17534182965755463, 'timestamp': '2025-09-10 02:42:04.806988', 'step': 6762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:04.859989', 'step': 6762, 'epoch': 1} {'type': 'loss', 'content': 0.1244170069694519, 'timestamp': '2025-09-10 02:42:04.867297', 'step': 6763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:04.921101', 'step': 6763, 'epoch': 1} {'type': 'loss', 'content': 0.09929247945547104, 'timestamp': '2025-09-10 02:42:04.927320', 'step': 6764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:04.991479', 'step': 6764, 'epoch': 1} {'type': 'loss', 'content': 0.1385299563407898, 'timestamp': '2025-09-10 02:42:04.993954', 'step': 6765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:05.052394', 'step': 6765, 'epoch': 1} {'type': 'loss', 'content': 0.2482444941997528, 'timestamp': '2025-09-10 02:42:05.054877', 'step': 6766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:05.133527', 'step': 6766, 'epoch': 1} {'type': 'loss', 'content': 0.17245712876319885, 'timestamp': '2025-09-10 02:42:05.135962', 'step': 6767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:05.217256', 'step': 6767, 'epoch': 1} {'type': 'loss', 'content': 0.2098688781261444, 'timestamp': '2025-09-10 02:42:05.223787', 'step': 6768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:05.288335', 'step': 6768, 'epoch': 1} {'type': 'loss', 'content': 0.08905378729104996, 'timestamp': '2025-09-10 02:42:05.290814', 'step': 6769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:05.346308', 'step': 6769, 'epoch': 1} {'type': 'loss', 'content': 0.2302129864692688, 'timestamp': '2025-09-10 02:42:05.348908', 'step': 6770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:05.409401', 'step': 6770, 'epoch': 1} {'type': 'loss', 'content': 0.09573691338300705, 'timestamp': '2025-09-10 02:42:05.412930', 'step': 6771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:05.476314', 'step': 6771, 'epoch': 1} {'type': 'loss', 'content': 0.206278994679451, 'timestamp': '2025-09-10 02:42:05.483671', 'step': 6772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:05.542046', 'step': 6772, 'epoch': 1} {'type': 'loss', 'content': 0.09769055992364883, 'timestamp': '2025-09-10 02:42:05.544520', 'step': 6773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:05.601263', 'step': 6773, 'epoch': 1} {'type': 'loss', 'content': 0.2036229819059372, 'timestamp': '2025-09-10 02:42:05.603707', 'step': 6774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:05.659841', 'step': 6774, 'epoch': 1} {'type': 'loss', 'content': 0.05759080871939659, 'timestamp': '2025-09-10 02:42:05.665450', 'step': 6775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:05.732459', 'step': 6775, 'epoch': 1} {'type': 'loss', 'content': 0.12420881539583206, 'timestamp': '2025-09-10 02:42:05.744235', 'step': 6776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:05.808279', 'step': 6776, 'epoch': 1} {'type': 'loss', 'content': 0.11268895864486694, 'timestamp': '2025-09-10 02:42:05.815432', 'step': 6777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:05.874023', 'step': 6777, 'epoch': 1} {'type': 'loss', 'content': 0.18531294167041779, 'timestamp': '2025-09-10 02:42:05.877584', 'step': 6778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:05.933613', 'step': 6778, 'epoch': 1} {'type': 'loss', 'content': 0.15459373593330383, 'timestamp': '2025-09-10 02:42:05.935768', 'step': 6779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:06.001777', 'step': 6779, 'epoch': 1} {'type': 'loss', 'content': 0.19046418368816376, 'timestamp': '2025-09-10 02:42:06.008015', 'step': 6780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:06.085797', 'step': 6780, 'epoch': 1} {'type': 'loss', 'content': 0.09131980687379837, 'timestamp': '2025-09-10 02:42:06.088059', 'step': 6781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:06.167776', 'step': 6781, 'epoch': 1} {'type': 'loss', 'content': 0.10877883434295654, 'timestamp': '2025-09-10 02:42:06.170290', 'step': 6782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:06.228720', 'step': 6782, 'epoch': 1} {'type': 'loss', 'content': 0.20115548372268677, 'timestamp': '2025-09-10 02:42:06.231126', 'step': 6783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:06.291421', 'step': 6783, 'epoch': 1} {'type': 'loss', 'content': 0.13559484481811523, 'timestamp': '2025-09-10 02:42:06.297584', 'step': 6784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:06.361405', 'step': 6784, 'epoch': 1} {'type': 'loss', 'content': 0.16076357662677765, 'timestamp': '2025-09-10 02:42:06.363943', 'step': 6785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:06.419102', 'step': 6785, 'epoch': 1} {'type': 'loss', 'content': 0.1082996353507042, 'timestamp': '2025-09-10 02:42:06.421325', 'step': 6786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:06.475881', 'step': 6786, 'epoch': 1} {'type': 'loss', 'content': 0.2772039771080017, 'timestamp': '2025-09-10 02:42:06.478089', 'step': 6787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:06.538266', 'step': 6787, 'epoch': 1} {'type': 'loss', 'content': 0.11037108302116394, 'timestamp': '2025-09-10 02:42:06.546155', 'step': 6788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:06.602344', 'step': 6788, 'epoch': 1} {'type': 'loss', 'content': 0.14023642241954803, 'timestamp': '2025-09-10 02:42:06.604759', 'step': 6789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:06.665409', 'step': 6789, 'epoch': 1} {'type': 'loss', 'content': 0.1965513676404953, 'timestamp': '2025-09-10 02:42:06.667735', 'step': 6790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:06.730292', 'step': 6790, 'epoch': 1} {'type': 'loss', 'content': 0.14015913009643555, 'timestamp': '2025-09-10 02:42:06.732799', 'step': 6791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:06.795500', 'step': 6791, 'epoch': 1} {'type': 'loss', 'content': 0.16054019331932068, 'timestamp': '2025-09-10 02:42:06.802968', 'step': 6792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:06.865109', 'step': 6792, 'epoch': 1} {'type': 'loss', 'content': 0.11088636517524719, 'timestamp': '2025-09-10 02:42:06.867575', 'step': 6793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:06.920619', 'step': 6793, 'epoch': 1} {'type': 'loss', 'content': 0.16594654321670532, 'timestamp': '2025-09-10 02:42:06.923096', 'step': 6794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:06.980292', 'step': 6794, 'epoch': 1} {'type': 'loss', 'content': 0.09345106035470963, 'timestamp': '2025-09-10 02:42:06.982799', 'step': 6795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:07.048464', 'step': 6795, 'epoch': 1} {'type': 'loss', 'content': 0.13118702173233032, 'timestamp': '2025-09-10 02:42:07.054581', 'step': 6796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:07.135230', 'step': 6796, 'epoch': 1} {'type': 'loss', 'content': 0.22034911811351776, 'timestamp': '2025-09-10 02:42:07.137623', 'step': 6797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:07.192530', 'step': 6797, 'epoch': 1} {'type': 'loss', 'content': 0.19336006045341492, 'timestamp': '2025-09-10 02:42:07.194798', 'step': 6798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:07.252219', 'step': 6798, 'epoch': 1} {'type': 'loss', 'content': 0.12979461252689362, 'timestamp': '2025-09-10 02:42:07.254670', 'step': 6799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:07.311142', 'step': 6799, 'epoch': 1} {'type': 'loss', 'content': 0.23661617934703827, 'timestamp': '2025-09-10 02:42:07.317316', 'step': 6800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:07.377806', 'step': 6800, 'epoch': 1} {'type': 'loss', 'content': 0.13694490492343903, 'timestamp': '2025-09-10 02:42:07.380010', 'step': 6801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:07.433290', 'step': 6801, 'epoch': 1} {'type': 'loss', 'content': 0.24181219935417175, 'timestamp': '2025-09-10 02:42:07.435441', 'step': 6802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:07.494532', 'step': 6802, 'epoch': 1} {'type': 'loss', 'content': 0.23623885214328766, 'timestamp': '2025-09-10 02:42:07.496948', 'step': 6803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:07.551982', 'step': 6803, 'epoch': 1} {'type': 'loss', 'content': 0.11847130954265594, 'timestamp': '2025-09-10 02:42:07.558012', 'step': 6804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:07.616940', 'step': 6804, 'epoch': 1} {'type': 'loss', 'content': 0.1960952877998352, 'timestamp': '2025-09-10 02:42:07.619314', 'step': 6805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:07.683730', 'step': 6805, 'epoch': 1} {'type': 'loss', 'content': 0.12560021877288818, 'timestamp': '2025-09-10 02:42:07.686153', 'step': 6806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:07.747302', 'step': 6806, 'epoch': 1} {'type': 'loss', 'content': 0.09070804715156555, 'timestamp': '2025-09-10 02:42:07.750988', 'step': 6807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:07.812671', 'step': 6807, 'epoch': 1} {'type': 'loss', 'content': 0.15118645131587982, 'timestamp': '2025-09-10 02:42:07.819007', 'step': 6808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:07.872225', 'step': 6808, 'epoch': 1} {'type': 'loss', 'content': 0.1366281658411026, 'timestamp': '2025-09-10 02:42:07.874738', 'step': 6809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:07.963005', 'step': 6809, 'epoch': 1} {'type': 'loss', 'content': 0.1950281709432602, 'timestamp': '2025-09-10 02:42:07.965428', 'step': 6810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:08.053955', 'step': 6810, 'epoch': 1} {'type': 'loss', 'content': 0.12313063442707062, 'timestamp': '2025-09-10 02:42:08.056391', 'step': 6811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:08.115821', 'step': 6811, 'epoch': 1} {'type': 'loss', 'content': 0.1849481165409088, 'timestamp': '2025-09-10 02:42:08.122925', 'step': 6812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:08.207392', 'step': 6812, 'epoch': 1} {'type': 'loss', 'content': 0.10082803666591644, 'timestamp': '2025-09-10 02:42:08.210045', 'step': 6813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:08.266684', 'step': 6813, 'epoch': 1} {'type': 'loss', 'content': 0.11543850600719452, 'timestamp': '2025-09-10 02:42:08.269223', 'step': 6814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:08.326161', 'step': 6814, 'epoch': 1} {'type': 'loss', 'content': 0.18489153683185577, 'timestamp': '2025-09-10 02:42:08.328639', 'step': 6815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:08.383570', 'step': 6815, 'epoch': 1} {'type': 'loss', 'content': 0.10096882283687592, 'timestamp': '2025-09-10 02:42:08.390004', 'step': 6816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:08.445317', 'step': 6816, 'epoch': 1} {'type': 'loss', 'content': 0.1668395847082138, 'timestamp': '2025-09-10 02:42:08.447681', 'step': 6817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:08.509724', 'step': 6817, 'epoch': 1} {'type': 'loss', 'content': 0.19866521656513214, 'timestamp': '2025-09-10 02:42:08.512759', 'step': 6818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:08.568007', 'step': 6818, 'epoch': 1} {'type': 'loss', 'content': 0.2533068358898163, 'timestamp': '2025-09-10 02:42:08.570397', 'step': 6819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:08.624420', 'step': 6819, 'epoch': 1} {'type': 'loss', 'content': 0.09487976878881454, 'timestamp': '2025-09-10 02:42:08.639153', 'step': 6820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:08.695961', 'step': 6820, 'epoch': 1} {'type': 'loss', 'content': 0.16378962993621826, 'timestamp': '2025-09-10 02:42:08.698428', 'step': 6821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:08.753413', 'step': 6821, 'epoch': 1} {'type': 'loss', 'content': 0.13067825138568878, 'timestamp': '2025-09-10 02:42:08.755797', 'step': 6822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:08.809769', 'step': 6822, 'epoch': 1} {'type': 'loss', 'content': 0.11681989580392838, 'timestamp': '2025-09-10 02:42:08.812449', 'step': 6823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:08.870240', 'step': 6823, 'epoch': 1} {'type': 'loss', 'content': 0.18169160187244415, 'timestamp': '2025-09-10 02:42:08.876542', 'step': 6824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:08.933748', 'step': 6824, 'epoch': 1} {'type': 'loss', 'content': 0.18527677655220032, 'timestamp': '2025-09-10 02:42:08.936129', 'step': 6825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:09.000033', 'step': 6825, 'epoch': 1} {'type': 'loss', 'content': 0.20103508234024048, 'timestamp': '2025-09-10 02:42:09.002467', 'step': 6826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:09.056895', 'step': 6826, 'epoch': 1} {'type': 'loss', 'content': 0.22945068776607513, 'timestamp': '2025-09-10 02:42:09.059541', 'step': 6827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:09.115754', 'step': 6827, 'epoch': 1} {'type': 'loss', 'content': 0.09451218694448471, 'timestamp': '2025-09-10 02:42:09.124760', 'step': 6828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:09.185955', 'step': 6828, 'epoch': 1} {'type': 'loss', 'content': 0.11918900907039642, 'timestamp': '2025-09-10 02:42:09.188433', 'step': 6829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:09.245765', 'step': 6829, 'epoch': 1} {'type': 'loss', 'content': 0.1031751036643982, 'timestamp': '2025-09-10 02:42:09.248188', 'step': 6830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:09.309481', 'step': 6830, 'epoch': 1} {'type': 'loss', 'content': 0.2521836757659912, 'timestamp': '2025-09-10 02:42:09.312323', 'step': 6831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:09.386861', 'step': 6831, 'epoch': 1} {'type': 'loss', 'content': 0.08165362477302551, 'timestamp': '2025-09-10 02:42:09.393185', 'step': 6832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:09.455524', 'step': 6832, 'epoch': 1} {'type': 'loss', 'content': 0.18591655790805817, 'timestamp': '2025-09-10 02:42:09.459359', 'step': 6833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:09.520308', 'step': 6833, 'epoch': 1} {'type': 'loss', 'content': 0.14832933247089386, 'timestamp': '2025-09-10 02:42:09.523232', 'step': 6834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:09.585026', 'step': 6834, 'epoch': 1} {'type': 'loss', 'content': 0.11736347526311874, 'timestamp': '2025-09-10 02:42:09.587729', 'step': 6835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:09.654408', 'step': 6835, 'epoch': 1} {'type': 'loss', 'content': 0.1646309196949005, 'timestamp': '2025-09-10 02:42:09.660390', 'step': 6836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:09.719672', 'step': 6836, 'epoch': 1} {'type': 'loss', 'content': 0.18819209933280945, 'timestamp': '2025-09-10 02:42:09.722016', 'step': 6837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:09.787923', 'step': 6837, 'epoch': 1} {'type': 'loss', 'content': 0.12509869039058685, 'timestamp': '2025-09-10 02:42:09.791651', 'step': 6838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:09.859773', 'step': 6838, 'epoch': 1} {'type': 'loss', 'content': 0.16521261632442474, 'timestamp': '2025-09-10 02:42:09.862008', 'step': 6839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:09.919326', 'step': 6839, 'epoch': 1} {'type': 'loss', 'content': 0.14396977424621582, 'timestamp': '2025-09-10 02:42:09.925402', 'step': 6840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:09.980298', 'step': 6840, 'epoch': 1} {'type': 'loss', 'content': 0.11570626497268677, 'timestamp': '2025-09-10 02:42:09.982839', 'step': 6841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:10.047284', 'step': 6841, 'epoch': 1} {'type': 'loss', 'content': 0.15244987607002258, 'timestamp': '2025-09-10 02:42:10.054358', 'step': 6842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:10.127567', 'step': 6842, 'epoch': 1} {'type': 'loss', 'content': 0.1510663777589798, 'timestamp': '2025-09-10 02:42:10.130136', 'step': 6843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:10.202815', 'step': 6843, 'epoch': 1} {'type': 'loss', 'content': 0.13339728116989136, 'timestamp': '2025-09-10 02:42:10.209203', 'step': 6844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:10.271542', 'step': 6844, 'epoch': 1} {'type': 'loss', 'content': 0.14898361265659332, 'timestamp': '2025-09-10 02:42:10.273955', 'step': 6845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:10.330968', 'step': 6845, 'epoch': 1} {'type': 'loss', 'content': 0.10074805468320847, 'timestamp': '2025-09-10 02:42:10.333330', 'step': 6846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:10.387087', 'step': 6846, 'epoch': 1} {'type': 'loss', 'content': 0.11301953345537186, 'timestamp': '2025-09-10 02:42:10.389659', 'step': 6847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:10.442739', 'step': 6847, 'epoch': 1} {'type': 'loss', 'content': 0.1281580626964569, 'timestamp': '2025-09-10 02:42:10.448769', 'step': 6848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:10.507908', 'step': 6848, 'epoch': 1} {'type': 'loss', 'content': 0.14951181411743164, 'timestamp': '2025-09-10 02:42:10.512806', 'step': 6849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:10.569277', 'step': 6849, 'epoch': 1} {'type': 'loss', 'content': 0.09032775461673737, 'timestamp': '2025-09-10 02:42:10.571758', 'step': 6850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:10.625621', 'step': 6850, 'epoch': 1} {'type': 'loss', 'content': 0.13929526507854462, 'timestamp': '2025-09-10 02:42:10.627883', 'step': 6851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:10.683158', 'step': 6851, 'epoch': 1} {'type': 'loss', 'content': 0.10754550248384476, 'timestamp': '2025-09-10 02:42:10.689321', 'step': 6852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:10.743003', 'step': 6852, 'epoch': 1} {'type': 'loss', 'content': 0.1418967843055725, 'timestamp': '2025-09-10 02:42:10.745384', 'step': 6853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:10.801488', 'step': 6853, 'epoch': 1} {'type': 'loss', 'content': 0.20432710647583008, 'timestamp': '2025-09-10 02:42:10.803771', 'step': 6854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:10.857547', 'step': 6854, 'epoch': 1} {'type': 'loss', 'content': 0.20769353210926056, 'timestamp': '2025-09-10 02:42:10.862009', 'step': 6855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:10.915623', 'step': 6855, 'epoch': 1} {'type': 'loss', 'content': 0.20703214406967163, 'timestamp': '2025-09-10 02:42:10.921860', 'step': 6856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:10.976075', 'step': 6856, 'epoch': 1} {'type': 'loss', 'content': 0.08944021910429001, 'timestamp': '2025-09-10 02:42:10.978614', 'step': 6857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:11.039702', 'step': 6857, 'epoch': 1} {'type': 'loss', 'content': 0.10991338640451431, 'timestamp': '2025-09-10 02:42:11.042035', 'step': 6858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:11.095946', 'step': 6858, 'epoch': 1} {'type': 'loss', 'content': 0.11784148961305618, 'timestamp': '2025-09-10 02:42:11.104002', 'step': 6859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:11.164610', 'step': 6859, 'epoch': 1} {'type': 'loss', 'content': 0.11209363490343094, 'timestamp': '2025-09-10 02:42:11.170683', 'step': 6860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:11.223908', 'step': 6860, 'epoch': 1} {'type': 'loss', 'content': 0.1735392063856125, 'timestamp': '2025-09-10 02:42:11.226150', 'step': 6861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:11.278930', 'step': 6861, 'epoch': 1} {'type': 'loss', 'content': 0.1670813262462616, 'timestamp': '2025-09-10 02:42:11.281281', 'step': 6862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:11.334863', 'step': 6862, 'epoch': 1} {'type': 'loss', 'content': 0.25348272919654846, 'timestamp': '2025-09-10 02:42:11.337732', 'step': 6863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:11.391785', 'step': 6863, 'epoch': 1} {'type': 'loss', 'content': 0.19186460971832275, 'timestamp': '2025-09-10 02:42:11.403028', 'step': 6864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:11.460266', 'step': 6864, 'epoch': 1} {'type': 'loss', 'content': 0.11986794322729111, 'timestamp': '2025-09-10 02:42:11.462882', 'step': 6865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:11.517615', 'step': 6865, 'epoch': 1} {'type': 'loss', 'content': 0.11431898176670074, 'timestamp': '2025-09-10 02:42:11.522855', 'step': 6866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:11.581499', 'step': 6866, 'epoch': 1} {'type': 'loss', 'content': 0.14397084712982178, 'timestamp': '2025-09-10 02:42:11.584998', 'step': 6867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:11.642221', 'step': 6867, 'epoch': 1} {'type': 'loss', 'content': 0.11909538507461548, 'timestamp': '2025-09-10 02:42:11.648375', 'step': 6868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:11.705710', 'step': 6868, 'epoch': 1} {'type': 'loss', 'content': 0.15432226657867432, 'timestamp': '2025-09-10 02:42:11.707988', 'step': 6869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:11.763887', 'step': 6869, 'epoch': 1} {'type': 'loss', 'content': 0.14017710089683533, 'timestamp': '2025-09-10 02:42:11.766432', 'step': 6870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:11.820190', 'step': 6870, 'epoch': 1} {'type': 'loss', 'content': 0.18518322706222534, 'timestamp': '2025-09-10 02:42:11.825452', 'step': 6871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:11.884051', 'step': 6871, 'epoch': 1} {'type': 'loss', 'content': 0.17456789314746857, 'timestamp': '2025-09-10 02:42:11.890913', 'step': 6872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:11.949491', 'step': 6872, 'epoch': 1} {'type': 'loss', 'content': 0.16681738197803497, 'timestamp': '2025-09-10 02:42:11.952663', 'step': 6873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:12.012256', 'step': 6873, 'epoch': 1} {'type': 'loss', 'content': 0.16631212830543518, 'timestamp': '2025-09-10 02:42:12.014589', 'step': 6874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:12.068190', 'step': 6874, 'epoch': 1} {'type': 'loss', 'content': 0.2346847951412201, 'timestamp': '2025-09-10 02:42:12.073200', 'step': 6875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:12.131157', 'step': 6875, 'epoch': 1} {'type': 'loss', 'content': 0.1443643867969513, 'timestamp': '2025-09-10 02:42:12.138280', 'step': 6876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:12.197238', 'step': 6876, 'epoch': 1} {'type': 'loss', 'content': 0.140707328915596, 'timestamp': '2025-09-10 02:42:12.202857', 'step': 6877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:12.260839', 'step': 6877, 'epoch': 1} {'type': 'loss', 'content': 0.20612895488739014, 'timestamp': '2025-09-10 02:42:12.263307', 'step': 6878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:12.317692', 'step': 6878, 'epoch': 1} {'type': 'loss', 'content': 0.1504824161529541, 'timestamp': '2025-09-10 02:42:12.320885', 'step': 6879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:12.378206', 'step': 6879, 'epoch': 1} {'type': 'loss', 'content': 0.12750130891799927, 'timestamp': '2025-09-10 02:42:12.386304', 'step': 6880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:12.440221', 'step': 6880, 'epoch': 1} {'type': 'loss', 'content': 0.15718984603881836, 'timestamp': '2025-09-10 02:42:12.442555', 'step': 6881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:12.495729', 'step': 6881, 'epoch': 1} {'type': 'loss', 'content': 0.3044050335884094, 'timestamp': '2025-09-10 02:42:12.498175', 'step': 6882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:12.560285', 'step': 6882, 'epoch': 1} {'type': 'loss', 'content': 0.1341361403465271, 'timestamp': '2025-09-10 02:42:12.562629', 'step': 6883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:12.617114', 'step': 6883, 'epoch': 1} {'type': 'loss', 'content': 0.1587006151676178, 'timestamp': '2025-09-10 02:42:12.623441', 'step': 6884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:12.677719', 'step': 6884, 'epoch': 1} {'type': 'loss', 'content': 0.13728037476539612, 'timestamp': '2025-09-10 02:42:12.685746', 'step': 6885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:12.745812', 'step': 6885, 'epoch': 1} {'type': 'loss', 'content': 0.1252688616514206, 'timestamp': '2025-09-10 02:42:12.748460', 'step': 6886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:12.801991', 'step': 6886, 'epoch': 1} {'type': 'loss', 'content': 0.13795195519924164, 'timestamp': '2025-09-10 02:42:12.805001', 'step': 6887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:12.859823', 'step': 6887, 'epoch': 1} {'type': 'loss', 'content': 0.1213383823633194, 'timestamp': '2025-09-10 02:42:12.865901', 'step': 6888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:12.918636', 'step': 6888, 'epoch': 1} {'type': 'loss', 'content': 0.10727056115865707, 'timestamp': '2025-09-10 02:42:12.921067', 'step': 6889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:12.974480', 'step': 6889, 'epoch': 1} {'type': 'loss', 'content': 0.1081341952085495, 'timestamp': '2025-09-10 02:42:12.976926', 'step': 6890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:13.030570', 'step': 6890, 'epoch': 1} {'type': 'loss', 'content': 0.15452712774276733, 'timestamp': '2025-09-10 02:42:13.033114', 'step': 6891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:13.086783', 'step': 6891, 'epoch': 1} {'type': 'loss', 'content': 0.16949567198753357, 'timestamp': '2025-09-10 02:42:13.092801', 'step': 6892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:13.145455', 'step': 6892, 'epoch': 1} {'type': 'loss', 'content': 0.22767460346221924, 'timestamp': '2025-09-10 02:42:13.147787', 'step': 6893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:13.202145', 'step': 6893, 'epoch': 1} {'type': 'loss', 'content': 0.09865827858448029, 'timestamp': '2025-09-10 02:42:13.207121', 'step': 6894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:13.265473', 'step': 6894, 'epoch': 1} {'type': 'loss', 'content': 0.16130055487155914, 'timestamp': '2025-09-10 02:42:13.267801', 'step': 6895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:13.321240', 'step': 6895, 'epoch': 1} {'type': 'loss', 'content': 0.2008362114429474, 'timestamp': '2025-09-10 02:42:13.327411', 'step': 6896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:13.380996', 'step': 6896, 'epoch': 1} {'type': 'loss', 'content': 0.11892083287239075, 'timestamp': '2025-09-10 02:42:13.384302', 'step': 6897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:13.440577', 'step': 6897, 'epoch': 1} {'type': 'loss', 'content': 0.17385070025920868, 'timestamp': '2025-09-10 02:42:13.443002', 'step': 6898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:13.498377', 'step': 6898, 'epoch': 1} {'type': 'loss', 'content': 0.22823138535022736, 'timestamp': '2025-09-10 02:42:13.500884', 'step': 6899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:13.567491', 'step': 6899, 'epoch': 1} {'type': 'loss', 'content': 0.16980451345443726, 'timestamp': '2025-09-10 02:42:13.573709', 'step': 6900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:13.626221', 'step': 6900, 'epoch': 1} {'type': 'loss', 'content': 0.12595896422863007, 'timestamp': '2025-09-10 02:42:13.629794', 'step': 6901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:13.685526', 'step': 6901, 'epoch': 1} {'type': 'loss', 'content': 0.29358989000320435, 'timestamp': '2025-09-10 02:42:13.688035', 'step': 6902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:13.741647', 'step': 6902, 'epoch': 1} {'type': 'loss', 'content': 0.21696196496486664, 'timestamp': '2025-09-10 02:42:13.743962', 'step': 6903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:13.798660', 'step': 6903, 'epoch': 1} {'type': 'loss', 'content': 0.12782065570354462, 'timestamp': '2025-09-10 02:42:13.806266', 'step': 6904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:13.865177', 'step': 6904, 'epoch': 1} {'type': 'loss', 'content': 0.19710449874401093, 'timestamp': '2025-09-10 02:42:13.870588', 'step': 6905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:13.927107', 'step': 6905, 'epoch': 1} {'type': 'loss', 'content': 0.2143058478832245, 'timestamp': '2025-09-10 02:42:13.929493', 'step': 6906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:13.983856', 'step': 6906, 'epoch': 1} {'type': 'loss', 'content': 0.1503763198852539, 'timestamp': '2025-09-10 02:42:13.986331', 'step': 6907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:14.049408', 'step': 6907, 'epoch': 1} {'type': 'loss', 'content': 0.10969933867454529, 'timestamp': '2025-09-10 02:42:14.055248', 'step': 6908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:14.108598', 'step': 6908, 'epoch': 1} {'type': 'loss', 'content': 0.13712768256664276, 'timestamp': '2025-09-10 02:42:14.110944', 'step': 6909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:14.166605', 'step': 6909, 'epoch': 1} {'type': 'loss', 'content': 0.19895005226135254, 'timestamp': '2025-09-10 02:42:14.168947', 'step': 6910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:14.230213', 'step': 6910, 'epoch': 1} {'type': 'loss', 'content': 0.1475604921579361, 'timestamp': '2025-09-10 02:42:14.232321', 'step': 6911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:14.291082', 'step': 6911, 'epoch': 1} {'type': 'loss', 'content': 0.11499273031949997, 'timestamp': '2025-09-10 02:42:14.297218', 'step': 6912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:14.350448', 'step': 6912, 'epoch': 1} {'type': 'loss', 'content': 0.16041500866413116, 'timestamp': '2025-09-10 02:42:14.352841', 'step': 6913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:14.406639', 'step': 6913, 'epoch': 1} {'type': 'loss', 'content': 0.14809848368167877, 'timestamp': '2025-09-10 02:42:14.409218', 'step': 6914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:14.463396', 'step': 6914, 'epoch': 1} {'type': 'loss', 'content': 0.20395945012569427, 'timestamp': '2025-09-10 02:42:14.468045', 'step': 6915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:14.523062', 'step': 6915, 'epoch': 1} {'type': 'loss', 'content': 0.11104676127433777, 'timestamp': '2025-09-10 02:42:14.529219', 'step': 6916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:14.582424', 'step': 6916, 'epoch': 1} {'type': 'loss', 'content': 0.10249936580657959, 'timestamp': '2025-09-10 02:42:14.585814', 'step': 6917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:14.640553', 'step': 6917, 'epoch': 1} {'type': 'loss', 'content': 0.0990142896771431, 'timestamp': '2025-09-10 02:42:14.642948', 'step': 6918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:14.700799', 'step': 6918, 'epoch': 1} {'type': 'loss', 'content': 0.13701918721199036, 'timestamp': '2025-09-10 02:42:14.703156', 'step': 6919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:14.756695', 'step': 6919, 'epoch': 1} {'type': 'loss', 'content': 0.10756495594978333, 'timestamp': '2025-09-10 02:42:14.764384', 'step': 6920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:14.819410', 'step': 6920, 'epoch': 1} {'type': 'loss', 'content': 0.11874771863222122, 'timestamp': '2025-09-10 02:42:14.822319', 'step': 6921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:14.877283', 'step': 6921, 'epoch': 1} {'type': 'loss', 'content': 0.13438284397125244, 'timestamp': '2025-09-10 02:42:14.879800', 'step': 6922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:14.935091', 'step': 6922, 'epoch': 1} {'type': 'loss', 'content': 0.16498713195323944, 'timestamp': '2025-09-10 02:42:14.937610', 'step': 6923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:14.997480', 'step': 6923, 'epoch': 1} {'type': 'loss', 'content': 0.12405819445848465, 'timestamp': '2025-09-10 02:42:15.004049', 'step': 6924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:15.056999', 'step': 6924, 'epoch': 1} {'type': 'loss', 'content': 0.13294896483421326, 'timestamp': '2025-09-10 02:42:15.059383', 'step': 6925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:15.122542', 'step': 6925, 'epoch': 1} {'type': 'loss', 'content': 0.1376759111881256, 'timestamp': '2025-09-10 02:42:15.126511', 'step': 6926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:15.188419', 'step': 6926, 'epoch': 1} {'type': 'loss', 'content': 0.17562897503376007, 'timestamp': '2025-09-10 02:42:15.190794', 'step': 6927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:15.252104', 'step': 6927, 'epoch': 1} {'type': 'loss', 'content': 0.1746547967195511, 'timestamp': '2025-09-10 02:42:15.258193', 'step': 6928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:15.312115', 'step': 6928, 'epoch': 1} {'type': 'loss', 'content': 0.15452559292316437, 'timestamp': '2025-09-10 02:42:15.317185', 'step': 6929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:15.371724', 'step': 6929, 'epoch': 1} {'type': 'loss', 'content': 0.19210389256477356, 'timestamp': '2025-09-10 02:42:15.379666', 'step': 6930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:15.437619', 'step': 6930, 'epoch': 1} {'type': 'loss', 'content': 0.12079490721225739, 'timestamp': '2025-09-10 02:42:15.440650', 'step': 6931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:15.496667', 'step': 6931, 'epoch': 1} {'type': 'loss', 'content': 0.06971994787454605, 'timestamp': '2025-09-10 02:42:15.502667', 'step': 6932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:15.557230', 'step': 6932, 'epoch': 1} {'type': 'loss', 'content': 0.0769893229007721, 'timestamp': '2025-09-10 02:42:15.559535', 'step': 6933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:15.613438', 'step': 6933, 'epoch': 1} {'type': 'loss', 'content': 0.13004697859287262, 'timestamp': '2025-09-10 02:42:15.615727', 'step': 6934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:15.669053', 'step': 6934, 'epoch': 1} {'type': 'loss', 'content': 0.12042862921953201, 'timestamp': '2025-09-10 02:42:15.671418', 'step': 6935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:15.725234', 'step': 6935, 'epoch': 1} {'type': 'loss', 'content': 0.1351446658372879, 'timestamp': '2025-09-10 02:42:15.731197', 'step': 6936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:15.788165', 'step': 6936, 'epoch': 1} {'type': 'loss', 'content': 0.11413730680942535, 'timestamp': '2025-09-10 02:42:15.790529', 'step': 6937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:15.849551', 'step': 6937, 'epoch': 1} {'type': 'loss', 'content': 0.10217391699552536, 'timestamp': '2025-09-10 02:42:15.851817', 'step': 6938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:15.906777', 'step': 6938, 'epoch': 1} {'type': 'loss', 'content': 0.1862679421901703, 'timestamp': '2025-09-10 02:42:15.909172', 'step': 6939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:15.962980', 'step': 6939, 'epoch': 1} {'type': 'loss', 'content': 0.18167854845523834, 'timestamp': '2025-09-10 02:42:15.969384', 'step': 6940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:16.022564', 'step': 6940, 'epoch': 1} {'type': 'loss', 'content': 0.18148162961006165, 'timestamp': '2025-09-10 02:42:16.024936', 'step': 6941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:16.078551', 'step': 6941, 'epoch': 1} {'type': 'loss', 'content': 0.17096908390522003, 'timestamp': '2025-09-10 02:42:16.081240', 'step': 6942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:16.135612', 'step': 6942, 'epoch': 1} {'type': 'loss', 'content': 0.15799853205680847, 'timestamp': '2025-09-10 02:42:16.138081', 'step': 6943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:16.193390', 'step': 6943, 'epoch': 1} {'type': 'loss', 'content': 0.10646411776542664, 'timestamp': '2025-09-10 02:42:16.199767', 'step': 6944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:16.253327', 'step': 6944, 'epoch': 1} {'type': 'loss', 'content': 0.1855776607990265, 'timestamp': '2025-09-10 02:42:16.255632', 'step': 6945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:16.309383', 'step': 6945, 'epoch': 1} {'type': 'loss', 'content': 0.12429308891296387, 'timestamp': '2025-09-10 02:42:16.311681', 'step': 6946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:16.365704', 'step': 6946, 'epoch': 1} {'type': 'loss', 'content': 0.13320323824882507, 'timestamp': '2025-09-10 02:42:16.368440', 'step': 6947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:16.423388', 'step': 6947, 'epoch': 1} {'type': 'loss', 'content': 0.11672559380531311, 'timestamp': '2025-09-10 02:42:16.429910', 'step': 6948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:16.483959', 'step': 6948, 'epoch': 1} {'type': 'loss', 'content': 0.1628456562757492, 'timestamp': '2025-09-10 02:42:16.486289', 'step': 6949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:16.541797', 'step': 6949, 'epoch': 1} {'type': 'loss', 'content': 0.2234617918729782, 'timestamp': '2025-09-10 02:42:16.544534', 'step': 6950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:16.599076', 'step': 6950, 'epoch': 1} {'type': 'loss', 'content': 0.14774708449840546, 'timestamp': '2025-09-10 02:42:16.601481', 'step': 6951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:16.656002', 'step': 6951, 'epoch': 1} {'type': 'loss', 'content': 0.12369219213724136, 'timestamp': '2025-09-10 02:42:16.663363', 'step': 6952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:16.717424', 'step': 6952, 'epoch': 1} {'type': 'loss', 'content': 0.16949862241744995, 'timestamp': '2025-09-10 02:42:16.719733', 'step': 6953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:16.773489', 'step': 6953, 'epoch': 1} {'type': 'loss', 'content': 0.11158061027526855, 'timestamp': '2025-09-10 02:42:16.775874', 'step': 6954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:16.830511', 'step': 6954, 'epoch': 1} {'type': 'loss', 'content': 0.20765602588653564, 'timestamp': '2025-09-10 02:42:16.832930', 'step': 6955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:16.888175', 'step': 6955, 'epoch': 1} {'type': 'loss', 'content': 0.14766107499599457, 'timestamp': '2025-09-10 02:42:16.894541', 'step': 6956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:16.949603', 'step': 6956, 'epoch': 1} {'type': 'loss', 'content': 0.2982363998889923, 'timestamp': '2025-09-10 02:42:16.952208', 'step': 6957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:17.010235', 'step': 6957, 'epoch': 1} {'type': 'loss', 'content': 0.15571439266204834, 'timestamp': '2025-09-10 02:42:17.012809', 'step': 6958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:17.067792', 'step': 6958, 'epoch': 1} {'type': 'loss', 'content': 0.19978825747966766, 'timestamp': '2025-09-10 02:42:17.070424', 'step': 6959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:17.126543', 'step': 6959, 'epoch': 1} {'type': 'loss', 'content': 0.16141383349895477, 'timestamp': '2025-09-10 02:42:17.132971', 'step': 6960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:17.188196', 'step': 6960, 'epoch': 1} {'type': 'loss', 'content': 0.17977352440357208, 'timestamp': '2025-09-10 02:42:17.190579', 'step': 6961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:17.245416', 'step': 6961, 'epoch': 1} {'type': 'loss', 'content': 0.15497702360153198, 'timestamp': '2025-09-10 02:42:17.247829', 'step': 6962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:17.302562', 'step': 6962, 'epoch': 1} {'type': 'loss', 'content': 0.13210619986057281, 'timestamp': '2025-09-10 02:42:17.304869', 'step': 6963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:17.359802', 'step': 6963, 'epoch': 1} {'type': 'loss', 'content': 0.22319528460502625, 'timestamp': '2025-09-10 02:42:17.366234', 'step': 6964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:17.421835', 'step': 6964, 'epoch': 1} {'type': 'loss', 'content': 0.16347943246364594, 'timestamp': '2025-09-10 02:42:17.424209', 'step': 6965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:17.479340', 'step': 6965, 'epoch': 1} {'type': 'loss', 'content': 0.10792524367570877, 'timestamp': '2025-09-10 02:42:17.481710', 'step': 6966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:17.537060', 'step': 6966, 'epoch': 1} {'type': 'loss', 'content': 0.12236545979976654, 'timestamp': '2025-09-10 02:42:17.539319', 'step': 6967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:17.593547', 'step': 6967, 'epoch': 1} {'type': 'loss', 'content': 0.18569307029247284, 'timestamp': '2025-09-10 02:42:17.599882', 'step': 6968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:17.654678', 'step': 6968, 'epoch': 1} {'type': 'loss', 'content': 0.0754394456744194, 'timestamp': '2025-09-10 02:42:17.657165', 'step': 6969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:17.714298', 'step': 6969, 'epoch': 1} {'type': 'loss', 'content': 0.13660305738449097, 'timestamp': '2025-09-10 02:42:17.718093', 'step': 6970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:17.773546', 'step': 6970, 'epoch': 1} {'type': 'loss', 'content': 0.1389172226190567, 'timestamp': '2025-09-10 02:42:17.776129', 'step': 6971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:17.830779', 'step': 6971, 'epoch': 1} {'type': 'loss', 'content': 0.13397961854934692, 'timestamp': '2025-09-10 02:42:17.837398', 'step': 6972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:17.891682', 'step': 6972, 'epoch': 1} {'type': 'loss', 'content': 0.3053203225135803, 'timestamp': '2025-09-10 02:42:17.894095', 'step': 6973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:17.949533', 'step': 6973, 'epoch': 1} {'type': 'loss', 'content': 0.16967205703258514, 'timestamp': '2025-09-10 02:42:17.952975', 'step': 6974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:18.007260', 'step': 6974, 'epoch': 1} {'type': 'loss', 'content': 0.17622610926628113, 'timestamp': '2025-09-10 02:42:18.009951', 'step': 6975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:18.066137', 'step': 6975, 'epoch': 1} {'type': 'loss', 'content': 0.16261588037014008, 'timestamp': '2025-09-10 02:42:18.072915', 'step': 6976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:18.126402', 'step': 6976, 'epoch': 1} {'type': 'loss', 'content': 0.207548588514328, 'timestamp': '2025-09-10 02:42:18.128593', 'step': 6977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:18.183999', 'step': 6977, 'epoch': 1} {'type': 'loss', 'content': 0.264394074678421, 'timestamp': '2025-09-10 02:42:18.186303', 'step': 6978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:18.239719', 'step': 6978, 'epoch': 1} {'type': 'loss', 'content': 0.11951339244842529, 'timestamp': '2025-09-10 02:42:18.248470', 'step': 6979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:18.302998', 'step': 6979, 'epoch': 1} {'type': 'loss', 'content': 0.15681453049182892, 'timestamp': '2025-09-10 02:42:18.309252', 'step': 6980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:18.364111', 'step': 6980, 'epoch': 1} {'type': 'loss', 'content': 0.14057116210460663, 'timestamp': '2025-09-10 02:42:18.366377', 'step': 6981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:18.421557', 'step': 6981, 'epoch': 1} {'type': 'loss', 'content': 0.07916796207427979, 'timestamp': '2025-09-10 02:42:18.423677', 'step': 6982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:18.477872', 'step': 6982, 'epoch': 1} {'type': 'loss', 'content': 0.18430788815021515, 'timestamp': '2025-09-10 02:42:18.480089', 'step': 6983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:18.533478', 'step': 6983, 'epoch': 1} {'type': 'loss', 'content': 0.10195986926555634, 'timestamp': '2025-09-10 02:42:18.539608', 'step': 6984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:18.593929', 'step': 6984, 'epoch': 1} {'type': 'loss', 'content': 0.11836938560009003, 'timestamp': '2025-09-10 02:42:18.596153', 'step': 6985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:18.651620', 'step': 6985, 'epoch': 1} {'type': 'loss', 'content': 0.23793019354343414, 'timestamp': '2025-09-10 02:42:18.653737', 'step': 6986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:18.707717', 'step': 6986, 'epoch': 1} {'type': 'loss', 'content': 0.21243278682231903, 'timestamp': '2025-09-10 02:42:18.711141', 'step': 6987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:18.765304', 'step': 6987, 'epoch': 1} {'type': 'loss', 'content': 0.1290336400270462, 'timestamp': '2025-09-10 02:42:18.770938', 'step': 6988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:18.826344', 'step': 6988, 'epoch': 1} {'type': 'loss', 'content': 0.13352511823177338, 'timestamp': '2025-09-10 02:42:18.828511', 'step': 6989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:18.885831', 'step': 6989, 'epoch': 1} {'type': 'loss', 'content': 0.1975243091583252, 'timestamp': '2025-09-10 02:42:18.888386', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:42:32.079461', 'step': 6990, 'epoch': 1} {'type': 'pplx', 'content': 11275.207867172385, 'timestamp': '2025-09-10 02:42:32.082941', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:32.139313', 'step': 6990, 'epoch': 1} {'type': 'loss', 'content': 0.17889223992824554, 'timestamp': '2025-09-10 02:42:32.141379', 'step': 6991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:32.198420', 'step': 6991, 'epoch': 1} {'type': 'loss', 'content': 0.1689504235982895, 'timestamp': '2025-09-10 02:42:32.204716', 'step': 6992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:32.261846', 'step': 6992, 'epoch': 1} {'type': 'loss', 'content': 0.17736785113811493, 'timestamp': '2025-09-10 02:42:32.263879', 'step': 6993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:32.319084', 'step': 6993, 'epoch': 1} {'type': 'loss', 'content': 0.10959769040346146, 'timestamp': '2025-09-10 02:42:32.321034', 'step': 6994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:32.377136', 'step': 6994, 'epoch': 1} {'type': 'loss', 'content': 0.23596270382404327, 'timestamp': '2025-09-10 02:42:32.379432', 'step': 6995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:32.434459', 'step': 6995, 'epoch': 1} {'type': 'loss', 'content': 0.08890374004840851, 'timestamp': '2025-09-10 02:42:32.440754', 'step': 6996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:32.495705', 'step': 6996, 'epoch': 1} {'type': 'loss', 'content': 0.1321156769990921, 'timestamp': '2025-09-10 02:42:32.497942', 'step': 6997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:32.554345', 'step': 6997, 'epoch': 1} {'type': 'loss', 'content': 0.19738301634788513, 'timestamp': '2025-09-10 02:42:32.556486', 'step': 6998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:32.612509', 'step': 6998, 'epoch': 1} {'type': 'loss', 'content': 0.12405627965927124, 'timestamp': '2025-09-10 02:42:32.614616', 'step': 6999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:32.671267', 'step': 6999, 'epoch': 1} {'type': 'loss', 'content': 0.2065829187631607, 'timestamp': '2025-09-10 02:42:32.677447', 'step': 7000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-09-10 02:42:33.099137', 'step': 7000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:33.159973', 'step': 7000, 'epoch': 1} {'type': 'loss', 'content': 0.0905870571732521, 'timestamp': '2025-09-10 02:42:33.162299', 'step': 7001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:33.226274', 'step': 7001, 'epoch': 1} {'type': 'loss', 'content': 0.16603323817253113, 'timestamp': '2025-09-10 02:42:33.228306', 'step': 7002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:33.290305', 'step': 7002, 'epoch': 1} {'type': 'loss', 'content': 0.26638132333755493, 'timestamp': '2025-09-10 02:42:33.292473', 'step': 7003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:33.350756', 'step': 7003, 'epoch': 1} {'type': 'loss', 'content': 0.1665058135986328, 'timestamp': '2025-09-10 02:42:33.357047', 'step': 7004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:33.413356', 'step': 7004, 'epoch': 1} {'type': 'loss', 'content': 0.15329821407794952, 'timestamp': '2025-09-10 02:42:33.415303', 'step': 7005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:33.471266', 'step': 7005, 'epoch': 1} {'type': 'loss', 'content': 0.13587182760238647, 'timestamp': '2025-09-10 02:42:33.473308', 'step': 7006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:33.535576', 'step': 7006, 'epoch': 1} {'type': 'loss', 'content': 0.14410018920898438, 'timestamp': '2025-09-10 02:42:33.539542', 'step': 7007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:33.596465', 'step': 7007, 'epoch': 1} {'type': 'loss', 'content': 0.09214625507593155, 'timestamp': '2025-09-10 02:42:33.606624', 'step': 7008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:33.662110', 'step': 7008, 'epoch': 1} {'type': 'loss', 'content': 0.17039301991462708, 'timestamp': '2025-09-10 02:42:33.664281', 'step': 7009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:33.724671', 'step': 7009, 'epoch': 1} {'type': 'loss', 'content': 0.126530259847641, 'timestamp': '2025-09-10 02:42:33.726718', 'step': 7010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:33.783804', 'step': 7010, 'epoch': 1} {'type': 'loss', 'content': 0.12570716440677643, 'timestamp': '2025-09-10 02:42:33.785950', 'step': 7011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:33.843554', 'step': 7011, 'epoch': 1} {'type': 'loss', 'content': 0.15766803920269012, 'timestamp': '2025-09-10 02:42:33.849762', 'step': 7012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:33.904455', 'step': 7012, 'epoch': 1} {'type': 'loss', 'content': 0.22432048618793488, 'timestamp': '2025-09-10 02:42:33.906354', 'step': 7013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:33.962435', 'step': 7013, 'epoch': 1} {'type': 'loss', 'content': 0.23236191272735596, 'timestamp': '2025-09-10 02:42:33.964504', 'step': 7014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:34.019631', 'step': 7014, 'epoch': 1} {'type': 'loss', 'content': 0.175306037068367, 'timestamp': '2025-09-10 02:42:34.021698', 'step': 7015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:34.077016', 'step': 7015, 'epoch': 1} {'type': 'loss', 'content': 0.22606304287910461, 'timestamp': '2025-09-10 02:42:34.083696', 'step': 7016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:34.139931', 'step': 7016, 'epoch': 1} {'type': 'loss', 'content': 0.22556515038013458, 'timestamp': '2025-09-10 02:42:34.141837', 'step': 7017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:34.198256', 'step': 7017, 'epoch': 1} {'type': 'loss', 'content': 0.1991320103406906, 'timestamp': '2025-09-10 02:42:34.200199', 'step': 7018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:34.255064', 'step': 7018, 'epoch': 1} {'type': 'loss', 'content': 0.1809915453195572, 'timestamp': '2025-09-10 02:42:34.256983', 'step': 7019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:34.311996', 'step': 7019, 'epoch': 1} {'type': 'loss', 'content': 0.09750236570835114, 'timestamp': '2025-09-10 02:42:34.318187', 'step': 7020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:34.373510', 'step': 7020, 'epoch': 1} {'type': 'loss', 'content': 0.1910816878080368, 'timestamp': '2025-09-10 02:42:34.375459', 'step': 7021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:34.429715', 'step': 7021, 'epoch': 1} {'type': 'loss', 'content': 0.132771834731102, 'timestamp': '2025-09-10 02:42:34.431737', 'step': 7022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:34.486650', 'step': 7022, 'epoch': 1} {'type': 'loss', 'content': 0.1592222899198532, 'timestamp': '2025-09-10 02:42:34.488749', 'step': 7023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:34.546400', 'step': 7023, 'epoch': 1} {'type': 'loss', 'content': 0.09572503715753555, 'timestamp': '2025-09-10 02:42:34.552548', 'step': 7024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:34.605798', 'step': 7024, 'epoch': 1} {'type': 'loss', 'content': 0.20907318592071533, 'timestamp': '2025-09-10 02:42:34.608024', 'step': 7025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:34.661219', 'step': 7025, 'epoch': 1} {'type': 'loss', 'content': 0.2633351683616638, 'timestamp': '2025-09-10 02:42:34.663365', 'step': 7026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:34.717620', 'step': 7026, 'epoch': 1} {'type': 'loss', 'content': 0.17939868569374084, 'timestamp': '2025-09-10 02:42:34.719444', 'step': 7027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:34.772134', 'step': 7027, 'epoch': 1} {'type': 'loss', 'content': 0.17285294830799103, 'timestamp': '2025-09-10 02:42:34.778136', 'step': 7028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:34.831582', 'step': 7028, 'epoch': 1} {'type': 'loss', 'content': 0.1050669401884079, 'timestamp': '2025-09-10 02:42:34.833716', 'step': 7029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:34.887711', 'step': 7029, 'epoch': 1} {'type': 'loss', 'content': 0.1448807269334793, 'timestamp': '2025-09-10 02:42:34.889819', 'step': 7030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:34.944186', 'step': 7030, 'epoch': 1} {'type': 'loss', 'content': 0.08404015004634857, 'timestamp': '2025-09-10 02:42:34.946402', 'step': 7031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:35.001307', 'step': 7031, 'epoch': 1} {'type': 'loss', 'content': 0.09714734554290771, 'timestamp': '2025-09-10 02:42:35.007692', 'step': 7032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:35.060499', 'step': 7032, 'epoch': 1} {'type': 'loss', 'content': 0.14127863943576813, 'timestamp': '2025-09-10 02:42:35.062648', 'step': 7033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.116278', 'step': 7033, 'epoch': 1} {'type': 'loss', 'content': 0.18168413639068604, 'timestamp': '2025-09-10 02:42:35.118544', 'step': 7034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:35.172272', 'step': 7034, 'epoch': 1} {'type': 'loss', 'content': 0.11076638102531433, 'timestamp': '2025-09-10 02:42:35.174473', 'step': 7035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:35.227915', 'step': 7035, 'epoch': 1} {'type': 'loss', 'content': 0.11782673001289368, 'timestamp': '2025-09-10 02:42:35.233952', 'step': 7036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:35.287052', 'step': 7036, 'epoch': 1} {'type': 'loss', 'content': 0.17915193736553192, 'timestamp': '2025-09-10 02:42:35.289181', 'step': 7037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.341972', 'step': 7037, 'epoch': 1} {'type': 'loss', 'content': 0.12029485404491425, 'timestamp': '2025-09-10 02:42:35.344259', 'step': 7038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:35.398226', 'step': 7038, 'epoch': 1} {'type': 'loss', 'content': 0.12784889340400696, 'timestamp': '2025-09-10 02:42:35.400582', 'step': 7039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.455586', 'step': 7039, 'epoch': 1} {'type': 'loss', 'content': 0.17996348440647125, 'timestamp': '2025-09-10 02:42:35.461599', 'step': 7040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.514519', 'step': 7040, 'epoch': 1} {'type': 'loss', 'content': 0.12474759668111801, 'timestamp': '2025-09-10 02:42:35.516755', 'step': 7041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.569918', 'step': 7041, 'epoch': 1} {'type': 'loss', 'content': 0.2111952006816864, 'timestamp': '2025-09-10 02:42:35.571848', 'step': 7042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.625611', 'step': 7042, 'epoch': 1} {'type': 'loss', 'content': 0.16133354604244232, 'timestamp': '2025-09-10 02:42:35.627947', 'step': 7043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:35.681798', 'step': 7043, 'epoch': 1} {'type': 'loss', 'content': 0.1499117761850357, 'timestamp': '2025-09-10 02:42:35.687934', 'step': 7044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:35.740279', 'step': 7044, 'epoch': 1} {'type': 'loss', 'content': 0.154472216963768, 'timestamp': '2025-09-10 02:42:35.742298', 'step': 7045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.796519', 'step': 7045, 'epoch': 1} {'type': 'loss', 'content': 0.11533661186695099, 'timestamp': '2025-09-10 02:42:35.798482', 'step': 7046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.852199', 'step': 7046, 'epoch': 1} {'type': 'loss', 'content': 0.2035096138715744, 'timestamp': '2025-09-10 02:42:35.854310', 'step': 7047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.908311', 'step': 7047, 'epoch': 1} {'type': 'loss', 'content': 0.2284003645181656, 'timestamp': '2025-09-10 02:42:35.914295', 'step': 7048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:35.970365', 'step': 7048, 'epoch': 1} {'type': 'loss', 'content': 0.13605894148349762, 'timestamp': '2025-09-10 02:42:35.972624', 'step': 7049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:36.026671', 'step': 7049, 'epoch': 1} {'type': 'loss', 'content': 0.12693317234516144, 'timestamp': '2025-09-10 02:42:36.028826', 'step': 7050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:36.082743', 'step': 7050, 'epoch': 1} {'type': 'loss', 'content': 0.20902405679225922, 'timestamp': '2025-09-10 02:42:36.084720', 'step': 7051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:36.138115', 'step': 7051, 'epoch': 1} {'type': 'loss', 'content': 0.20021294057369232, 'timestamp': '2025-09-10 02:42:36.144318', 'step': 7052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:36.197680', 'step': 7052, 'epoch': 1} {'type': 'loss', 'content': 0.17063115537166595, 'timestamp': '2025-09-10 02:42:36.199805', 'step': 7053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:36.253437', 'step': 7053, 'epoch': 1} {'type': 'loss', 'content': 0.16298779845237732, 'timestamp': '2025-09-10 02:42:36.255588', 'step': 7054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:36.309340', 'step': 7054, 'epoch': 1} {'type': 'loss', 'content': 0.1755656599998474, 'timestamp': '2025-09-10 02:42:36.311463', 'step': 7055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:36.366276', 'step': 7055, 'epoch': 1} {'type': 'loss', 'content': 0.09128795564174652, 'timestamp': '2025-09-10 02:42:36.372233', 'step': 7056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:36.424814', 'step': 7056, 'epoch': 1} {'type': 'loss', 'content': 0.18782491981983185, 'timestamp': '2025-09-10 02:42:36.426851', 'step': 7057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:36.480763', 'step': 7057, 'epoch': 1} {'type': 'loss', 'content': 0.20910733938217163, 'timestamp': '2025-09-10 02:42:36.482968', 'step': 7058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:36.537097', 'step': 7058, 'epoch': 1} {'type': 'loss', 'content': 0.2620044946670532, 'timestamp': '2025-09-10 02:42:36.538998', 'step': 7059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:36.592947', 'step': 7059, 'epoch': 1} {'type': 'loss', 'content': 0.3070789575576782, 'timestamp': '2025-09-10 02:42:36.598779', 'step': 7060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:36.651633', 'step': 7060, 'epoch': 1} {'type': 'loss', 'content': 0.1403045952320099, 'timestamp': '2025-09-10 02:42:36.654107', 'step': 7061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:36.707290', 'step': 7061, 'epoch': 1} {'type': 'loss', 'content': 0.2761026620864868, 'timestamp': '2025-09-10 02:42:36.709209', 'step': 7062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:36.764110', 'step': 7062, 'epoch': 1} {'type': 'loss', 'content': 0.2682574689388275, 'timestamp': '2025-09-10 02:42:36.766402', 'step': 7063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:36.820094', 'step': 7063, 'epoch': 1} {'type': 'loss', 'content': 0.202613964676857, 'timestamp': '2025-09-10 02:42:36.825868', 'step': 7064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:36.879207', 'step': 7064, 'epoch': 1} {'type': 'loss', 'content': 0.17382638156414032, 'timestamp': '2025-09-10 02:42:36.881236', 'step': 7065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:36.935270', 'step': 7065, 'epoch': 1} {'type': 'loss', 'content': 0.20932088792324066, 'timestamp': '2025-09-10 02:42:36.937514', 'step': 7066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:36.991619', 'step': 7066, 'epoch': 1} {'type': 'loss', 'content': 0.19880926609039307, 'timestamp': '2025-09-10 02:42:36.994003', 'step': 7067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:37.048977', 'step': 7067, 'epoch': 1} {'type': 'loss', 'content': 0.1335911750793457, 'timestamp': '2025-09-10 02:42:37.055207', 'step': 7068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:37.108688', 'step': 7068, 'epoch': 1} {'type': 'loss', 'content': 0.1578388661146164, 'timestamp': '2025-09-10 02:42:37.110909', 'step': 7069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:37.168830', 'step': 7069, 'epoch': 1} {'type': 'loss', 'content': 0.11635853350162506, 'timestamp': '2025-09-10 02:42:37.171015', 'step': 7070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:37.228313', 'step': 7070, 'epoch': 1} {'type': 'loss', 'content': 0.17143572866916656, 'timestamp': '2025-09-10 02:42:37.230496', 'step': 7071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:37.288597', 'step': 7071, 'epoch': 1} {'type': 'loss', 'content': 0.15780048072338104, 'timestamp': '2025-09-10 02:42:37.294771', 'step': 7072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:37.347607', 'step': 7072, 'epoch': 1} {'type': 'loss', 'content': 0.13430656492710114, 'timestamp': '2025-09-10 02:42:37.349675', 'step': 7073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:37.402801', 'step': 7073, 'epoch': 1} {'type': 'loss', 'content': 0.16265831887722015, 'timestamp': '2025-09-10 02:42:37.405045', 'step': 7074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:37.458457', 'step': 7074, 'epoch': 1} {'type': 'loss', 'content': 0.0968393087387085, 'timestamp': '2025-09-10 02:42:37.460628', 'step': 7075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:37.514655', 'step': 7075, 'epoch': 1} {'type': 'loss', 'content': 0.09405897557735443, 'timestamp': '2025-09-10 02:42:37.520941', 'step': 7076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:37.574651', 'step': 7076, 'epoch': 1} {'type': 'loss', 'content': 0.0946144387125969, 'timestamp': '2025-09-10 02:42:37.576706', 'step': 7077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:37.630878', 'step': 7077, 'epoch': 1} {'type': 'loss', 'content': 0.16742166876792908, 'timestamp': '2025-09-10 02:42:37.633251', 'step': 7078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:37.686965', 'step': 7078, 'epoch': 1} {'type': 'loss', 'content': 0.20699933171272278, 'timestamp': '2025-09-10 02:42:37.689231', 'step': 7079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:37.742505', 'step': 7079, 'epoch': 1} {'type': 'loss', 'content': 0.08860288560390472, 'timestamp': '2025-09-10 02:42:37.748606', 'step': 7080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:37.802012', 'step': 7080, 'epoch': 1} {'type': 'loss', 'content': 0.15778422355651855, 'timestamp': '2025-09-10 02:42:37.804217', 'step': 7081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:37.858068', 'step': 7081, 'epoch': 1} {'type': 'loss', 'content': 0.1639108955860138, 'timestamp': '2025-09-10 02:42:37.860528', 'step': 7082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:37.914760', 'step': 7082, 'epoch': 1} {'type': 'loss', 'content': 0.11008185148239136, 'timestamp': '2025-09-10 02:42:37.916996', 'step': 7083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:37.973226', 'step': 7083, 'epoch': 1} {'type': 'loss', 'content': 0.17589245736598969, 'timestamp': '2025-09-10 02:42:37.979258', 'step': 7084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:38.031841', 'step': 7084, 'epoch': 1} {'type': 'loss', 'content': 0.20376908779144287, 'timestamp': '2025-09-10 02:42:38.034010', 'step': 7085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:38.090127', 'step': 7085, 'epoch': 1} {'type': 'loss', 'content': 0.16936545073986053, 'timestamp': '2025-09-10 02:42:38.092044', 'step': 7086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:38.145868', 'step': 7086, 'epoch': 1} {'type': 'loss', 'content': 0.2056037336587906, 'timestamp': '2025-09-10 02:42:38.148019', 'step': 7087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:38.201825', 'step': 7087, 'epoch': 1} {'type': 'loss', 'content': 0.1472330391407013, 'timestamp': '2025-09-10 02:42:38.208004', 'step': 7088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:38.261179', 'step': 7088, 'epoch': 1} {'type': 'loss', 'content': 0.10353761911392212, 'timestamp': '2025-09-10 02:42:38.263319', 'step': 7089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:38.316711', 'step': 7089, 'epoch': 1} {'type': 'loss', 'content': 0.06387181580066681, 'timestamp': '2025-09-10 02:42:38.318956', 'step': 7090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:38.373398', 'step': 7090, 'epoch': 1} {'type': 'loss', 'content': 0.17910441756248474, 'timestamp': '2025-09-10 02:42:38.375546', 'step': 7091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:38.429259', 'step': 7091, 'epoch': 1} {'type': 'loss', 'content': 0.11216920614242554, 'timestamp': '2025-09-10 02:42:38.435339', 'step': 7092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:38.487842', 'step': 7092, 'epoch': 1} {'type': 'loss', 'content': 0.1009736880660057, 'timestamp': '2025-09-10 02:42:38.490061', 'step': 7093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:38.543355', 'step': 7093, 'epoch': 1} {'type': 'loss', 'content': 0.1217859610915184, 'timestamp': '2025-09-10 02:42:38.545468', 'step': 7094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:38.600351', 'step': 7094, 'epoch': 1} {'type': 'loss', 'content': 0.22134104371070862, 'timestamp': '2025-09-10 02:42:38.602582', 'step': 7095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:38.656356', 'step': 7095, 'epoch': 1} {'type': 'loss', 'content': 0.12359432131052017, 'timestamp': '2025-09-10 02:42:38.662669', 'step': 7096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:38.715401', 'step': 7096, 'epoch': 1} {'type': 'loss', 'content': 0.1460844874382019, 'timestamp': '2025-09-10 02:42:38.717864', 'step': 7097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:38.770585', 'step': 7097, 'epoch': 1} {'type': 'loss', 'content': 0.19906993210315704, 'timestamp': '2025-09-10 02:42:38.772697', 'step': 7098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:38.825939', 'step': 7098, 'epoch': 1} {'type': 'loss', 'content': 0.1207386776804924, 'timestamp': '2025-09-10 02:42:38.827990', 'step': 7099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:38.881413', 'step': 7099, 'epoch': 1} {'type': 'loss', 'content': 0.18147750198841095, 'timestamp': '2025-09-10 02:42:38.887520', 'step': 7100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:38.941476', 'step': 7100, 'epoch': 1} {'type': 'loss', 'content': 0.17412763833999634, 'timestamp': '2025-09-10 02:42:38.943647', 'step': 7101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:38.996735', 'step': 7101, 'epoch': 1} {'type': 'loss', 'content': 0.2112438827753067, 'timestamp': '2025-09-10 02:42:38.998698', 'step': 7102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:39.052399', 'step': 7102, 'epoch': 1} {'type': 'loss', 'content': 0.14433583617210388, 'timestamp': '2025-09-10 02:42:39.054516', 'step': 7103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:39.109219', 'step': 7103, 'epoch': 1} {'type': 'loss', 'content': 0.20448504388332367, 'timestamp': '2025-09-10 02:42:39.115316', 'step': 7104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:39.170446', 'step': 7104, 'epoch': 1} {'type': 'loss', 'content': 0.10342196375131607, 'timestamp': '2025-09-10 02:42:39.172706', 'step': 7105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:39.227357', 'step': 7105, 'epoch': 1} {'type': 'loss', 'content': 0.1603676825761795, 'timestamp': '2025-09-10 02:42:39.229574', 'step': 7106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:39.284345', 'step': 7106, 'epoch': 1} {'type': 'loss', 'content': 0.12441898137331009, 'timestamp': '2025-09-10 02:42:39.286544', 'step': 7107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:39.340606', 'step': 7107, 'epoch': 1} {'type': 'loss', 'content': 0.19649256765842438, 'timestamp': '2025-09-10 02:42:39.346871', 'step': 7108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:39.400306', 'step': 7108, 'epoch': 1} {'type': 'loss', 'content': 0.10843127220869064, 'timestamp': '2025-09-10 02:42:39.402536', 'step': 7109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:39.456826', 'step': 7109, 'epoch': 1} {'type': 'loss', 'content': 0.14386209845542908, 'timestamp': '2025-09-10 02:42:39.459120', 'step': 7110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:39.513066', 'step': 7110, 'epoch': 1} {'type': 'loss', 'content': 0.1437140554189682, 'timestamp': '2025-09-10 02:42:39.515189', 'step': 7111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:39.572420', 'step': 7111, 'epoch': 1} {'type': 'loss', 'content': 0.19607210159301758, 'timestamp': '2025-09-10 02:42:39.578349', 'step': 7112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:39.631110', 'step': 7112, 'epoch': 1} {'type': 'loss', 'content': 0.1458619236946106, 'timestamp': '2025-09-10 02:42:39.633215', 'step': 7113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:39.686837', 'step': 7113, 'epoch': 1} {'type': 'loss', 'content': 0.0969042032957077, 'timestamp': '2025-09-10 02:42:39.688863', 'step': 7114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:39.744152', 'step': 7114, 'epoch': 1} {'type': 'loss', 'content': 0.18518893420696259, 'timestamp': '2025-09-10 02:42:39.746335', 'step': 7115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:39.800369', 'step': 7115, 'epoch': 1} {'type': 'loss', 'content': 0.0998305007815361, 'timestamp': '2025-09-10 02:42:39.806491', 'step': 7116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:39.860738', 'step': 7116, 'epoch': 1} {'type': 'loss', 'content': 0.20895089209079742, 'timestamp': '2025-09-10 02:42:39.862939', 'step': 7117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:39.917072', 'step': 7117, 'epoch': 1} {'type': 'loss', 'content': 0.2389371693134308, 'timestamp': '2025-09-10 02:42:39.919143', 'step': 7118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:39.973254', 'step': 7118, 'epoch': 1} {'type': 'loss', 'content': 0.08766698837280273, 'timestamp': '2025-09-10 02:42:39.975174', 'step': 7119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:40.029798', 'step': 7119, 'epoch': 1} {'type': 'loss', 'content': 0.11966992914676666, 'timestamp': '2025-09-10 02:42:40.035905', 'step': 7120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:40.089211', 'step': 7120, 'epoch': 1} {'type': 'loss', 'content': 0.1474187970161438, 'timestamp': '2025-09-10 02:42:40.091363', 'step': 7121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:40.145246', 'step': 7121, 'epoch': 1} {'type': 'loss', 'content': 0.11794497817754745, 'timestamp': '2025-09-10 02:42:40.147461', 'step': 7122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:40.201914', 'step': 7122, 'epoch': 1} {'type': 'loss', 'content': 0.14013566076755524, 'timestamp': '2025-09-10 02:42:40.204195', 'step': 7123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:40.258584', 'step': 7123, 'epoch': 1} {'type': 'loss', 'content': 0.13649040460586548, 'timestamp': '2025-09-10 02:42:40.265444', 'step': 7124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:40.320160', 'step': 7124, 'epoch': 1} {'type': 'loss', 'content': 0.09993179887533188, 'timestamp': '2025-09-10 02:42:40.322520', 'step': 7125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:40.376139', 'step': 7125, 'epoch': 1} {'type': 'loss', 'content': 0.10677755624055862, 'timestamp': '2025-09-10 02:42:40.378191', 'step': 7126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:40.431803', 'step': 7126, 'epoch': 1} {'type': 'loss', 'content': 0.11059840768575668, 'timestamp': '2025-09-10 02:42:40.433984', 'step': 7127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:40.487036', 'step': 7127, 'epoch': 1} {'type': 'loss', 'content': 0.24841147661209106, 'timestamp': '2025-09-10 02:42:40.492976', 'step': 7128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:40.546439', 'step': 7128, 'epoch': 1} {'type': 'loss', 'content': 0.13751544058322906, 'timestamp': '2025-09-10 02:42:40.548524', 'step': 7129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:40.601848', 'step': 7129, 'epoch': 1} {'type': 'loss', 'content': 0.2228900045156479, 'timestamp': '2025-09-10 02:42:40.604043', 'step': 7130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:40.659385', 'step': 7130, 'epoch': 1} {'type': 'loss', 'content': 0.10720570385456085, 'timestamp': '2025-09-10 02:42:40.661636', 'step': 7131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:40.717829', 'step': 7131, 'epoch': 1} {'type': 'loss', 'content': 0.19636879861354828, 'timestamp': '2025-09-10 02:42:40.724027', 'step': 7132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:40.779767', 'step': 7132, 'epoch': 1} {'type': 'loss', 'content': 0.14749236404895782, 'timestamp': '2025-09-10 02:42:40.782129', 'step': 7133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:40.836862', 'step': 7133, 'epoch': 1} {'type': 'loss', 'content': 0.12021921575069427, 'timestamp': '2025-09-10 02:42:40.839366', 'step': 7134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:40.893690', 'step': 7134, 'epoch': 1} {'type': 'loss', 'content': 0.17313630878925323, 'timestamp': '2025-09-10 02:42:40.895686', 'step': 7135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:40.949777', 'step': 7135, 'epoch': 1} {'type': 'loss', 'content': 0.14204169809818268, 'timestamp': '2025-09-10 02:42:40.955460', 'step': 7136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:41.009176', 'step': 7136, 'epoch': 1} {'type': 'loss', 'content': 0.10548339784145355, 'timestamp': '2025-09-10 02:42:41.010939', 'step': 7137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:41.064816', 'step': 7137, 'epoch': 1} {'type': 'loss', 'content': 0.10083483159542084, 'timestamp': '2025-09-10 02:42:41.066526', 'step': 7138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:41.120443', 'step': 7138, 'epoch': 1} {'type': 'loss', 'content': 0.07733183354139328, 'timestamp': '2025-09-10 02:42:41.122639', 'step': 7139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:41.177315', 'step': 7139, 'epoch': 1} {'type': 'loss', 'content': 0.1340283304452896, 'timestamp': '2025-09-10 02:42:41.183163', 'step': 7140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:41.236704', 'step': 7140, 'epoch': 1} {'type': 'loss', 'content': 0.12837032973766327, 'timestamp': '2025-09-10 02:42:41.238683', 'step': 7141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:41.293837', 'step': 7141, 'epoch': 1} {'type': 'loss', 'content': 0.09729151427745819, 'timestamp': '2025-09-10 02:42:41.295736', 'step': 7142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:41.349167', 'step': 7142, 'epoch': 1} {'type': 'loss', 'content': 0.20358800888061523, 'timestamp': '2025-09-10 02:42:41.351123', 'step': 7143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:41.405676', 'step': 7143, 'epoch': 1} {'type': 'loss', 'content': 0.16120383143424988, 'timestamp': '2025-09-10 02:42:41.411792', 'step': 7144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:41.465679', 'step': 7144, 'epoch': 1} {'type': 'loss', 'content': 0.11129672080278397, 'timestamp': '2025-09-10 02:42:41.467811', 'step': 7145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:41.523241', 'step': 7145, 'epoch': 1} {'type': 'loss', 'content': 0.18583397567272186, 'timestamp': '2025-09-10 02:42:41.525464', 'step': 7146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:41.586838', 'step': 7146, 'epoch': 1} {'type': 'loss', 'content': 0.09507034718990326, 'timestamp': '2025-09-10 02:42:41.588958', 'step': 7147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:41.644731', 'step': 7147, 'epoch': 1} {'type': 'loss', 'content': 0.08299683034420013, 'timestamp': '2025-09-10 02:42:41.655077', 'step': 7148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:41.710758', 'step': 7148, 'epoch': 1} {'type': 'loss', 'content': 0.18455229699611664, 'timestamp': '2025-09-10 02:42:41.712461', 'step': 7149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:41.766611', 'step': 7149, 'epoch': 1} {'type': 'loss', 'content': 0.12914904952049255, 'timestamp': '2025-09-10 02:42:41.768770', 'step': 7150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:41.823456', 'step': 7150, 'epoch': 1} {'type': 'loss', 'content': 0.1474049687385559, 'timestamp': '2025-09-10 02:42:41.825493', 'step': 7151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:41.879817', 'step': 7151, 'epoch': 1} {'type': 'loss', 'content': 0.2155734896659851, 'timestamp': '2025-09-10 02:42:41.886109', 'step': 7152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:41.942784', 'step': 7152, 'epoch': 1} {'type': 'loss', 'content': 0.11996980756521225, 'timestamp': '2025-09-10 02:42:41.946454', 'step': 7153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:42.004388', 'step': 7153, 'epoch': 1} {'type': 'loss', 'content': 0.12819640338420868, 'timestamp': '2025-09-10 02:42:42.006673', 'step': 7154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:42.060858', 'step': 7154, 'epoch': 1} {'type': 'loss', 'content': 0.11043679714202881, 'timestamp': '2025-09-10 02:42:42.062756', 'step': 7155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:42.116613', 'step': 7155, 'epoch': 1} {'type': 'loss', 'content': 0.11669427901506424, 'timestamp': '2025-09-10 02:42:42.122543', 'step': 7156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:42.176856', 'step': 7156, 'epoch': 1} {'type': 'loss', 'content': 0.13397447764873505, 'timestamp': '2025-09-10 02:42:42.179851', 'step': 7157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:42.236194', 'step': 7157, 'epoch': 1} {'type': 'loss', 'content': 0.19685570895671844, 'timestamp': '2025-09-10 02:42:42.238026', 'step': 7158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:42.291505', 'step': 7158, 'epoch': 1} {'type': 'loss', 'content': 0.2065512090921402, 'timestamp': '2025-09-10 02:42:42.293591', 'step': 7159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:42.349036', 'step': 7159, 'epoch': 1} {'type': 'loss', 'content': 0.10485449433326721, 'timestamp': '2025-09-10 02:42:42.359510', 'step': 7160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:42.416448', 'step': 7160, 'epoch': 1} {'type': 'loss', 'content': 0.27537620067596436, 'timestamp': '2025-09-10 02:42:42.418725', 'step': 7161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:42.474318', 'step': 7161, 'epoch': 1} {'type': 'loss', 'content': 0.15469112992286682, 'timestamp': '2025-09-10 02:42:42.476551', 'step': 7162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:42.530383', 'step': 7162, 'epoch': 1} {'type': 'loss', 'content': 0.19394339621067047, 'timestamp': '2025-09-10 02:42:42.532494', 'step': 7163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:42.589902', 'step': 7163, 'epoch': 1} {'type': 'loss', 'content': 0.14451806247234344, 'timestamp': '2025-09-10 02:42:42.595829', 'step': 7164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:42.650050', 'step': 7164, 'epoch': 1} {'type': 'loss', 'content': 0.14328032732009888, 'timestamp': '2025-09-10 02:42:42.651813', 'step': 7165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:42.705312', 'step': 7165, 'epoch': 1} {'type': 'loss', 'content': 0.11311905831098557, 'timestamp': '2025-09-10 02:42:42.707112', 'step': 7166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:42.760868', 'step': 7166, 'epoch': 1} {'type': 'loss', 'content': 0.2529635429382324, 'timestamp': '2025-09-10 02:42:42.762849', 'step': 7167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:42.817031', 'step': 7167, 'epoch': 1} {'type': 'loss', 'content': 0.15079689025878906, 'timestamp': '2025-09-10 02:42:42.823082', 'step': 7168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:42.876477', 'step': 7168, 'epoch': 1} {'type': 'loss', 'content': 0.19023704528808594, 'timestamp': '2025-09-10 02:42:42.878527', 'step': 7169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:42.931909', 'step': 7169, 'epoch': 1} {'type': 'loss', 'content': 0.1040702685713768, 'timestamp': '2025-09-10 02:42:42.933716', 'step': 7170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:42.988280', 'step': 7170, 'epoch': 1} {'type': 'loss', 'content': 0.11802763491868973, 'timestamp': '2025-09-10 02:42:42.990441', 'step': 7171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:43.048521', 'step': 7171, 'epoch': 1} {'type': 'loss', 'content': 0.2108864039182663, 'timestamp': '2025-09-10 02:42:43.057991', 'step': 7172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:43.112894', 'step': 7172, 'epoch': 1} {'type': 'loss', 'content': 0.16729438304901123, 'timestamp': '2025-09-10 02:42:43.114702', 'step': 7173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:43.169997', 'step': 7173, 'epoch': 1} {'type': 'loss', 'content': 0.1397024691104889, 'timestamp': '2025-09-10 02:42:43.172237', 'step': 7174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:43.226399', 'step': 7174, 'epoch': 1} {'type': 'loss', 'content': 0.13479535281658173, 'timestamp': '2025-09-10 02:42:43.229817', 'step': 7175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:43.288690', 'step': 7175, 'epoch': 1} {'type': 'loss', 'content': 0.17702655494213104, 'timestamp': '2025-09-10 02:42:43.294395', 'step': 7176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:43.347630', 'step': 7176, 'epoch': 1} {'type': 'loss', 'content': 0.17036207020282745, 'timestamp': '2025-09-10 02:42:43.349386', 'step': 7177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:43.402395', 'step': 7177, 'epoch': 1} {'type': 'loss', 'content': 0.12094449996948242, 'timestamp': '2025-09-10 02:42:43.406757', 'step': 7178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:43.466207', 'step': 7178, 'epoch': 1} {'type': 'loss', 'content': 0.09879475086927414, 'timestamp': '2025-09-10 02:42:43.475657', 'step': 7179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:43.536842', 'step': 7179, 'epoch': 1} {'type': 'loss', 'content': 0.13908743858337402, 'timestamp': '2025-09-10 02:42:43.542748', 'step': 7180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:43.600049', 'step': 7180, 'epoch': 1} {'type': 'loss', 'content': 0.2077520787715912, 'timestamp': '2025-09-10 02:42:43.601785', 'step': 7181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:43.660412', 'step': 7181, 'epoch': 1} {'type': 'loss', 'content': 0.14428246021270752, 'timestamp': '2025-09-10 02:42:43.662422', 'step': 7182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:43.718158', 'step': 7182, 'epoch': 1} {'type': 'loss', 'content': 0.14449341595172882, 'timestamp': '2025-09-10 02:42:43.722893', 'step': 7183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:43.780105', 'step': 7183, 'epoch': 1} {'type': 'loss', 'content': 0.20666155219078064, 'timestamp': '2025-09-10 02:42:43.785778', 'step': 7184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:43.842386', 'step': 7184, 'epoch': 1} {'type': 'loss', 'content': 0.18822385370731354, 'timestamp': '2025-09-10 02:42:43.844313', 'step': 7185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:43.901656', 'step': 7185, 'epoch': 1} {'type': 'loss', 'content': 0.1059824526309967, 'timestamp': '2025-09-10 02:42:43.903896', 'step': 7186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:43.975852', 'step': 7186, 'epoch': 1} {'type': 'loss', 'content': 0.18681751191616058, 'timestamp': '2025-09-10 02:42:43.977798', 'step': 7187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:44.032689', 'step': 7187, 'epoch': 1} {'type': 'loss', 'content': 0.15688085556030273, 'timestamp': '2025-09-10 02:42:44.038165', 'step': 7188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:44.097298', 'step': 7188, 'epoch': 1} {'type': 'loss', 'content': 0.2652741074562073, 'timestamp': '2025-09-10 02:42:44.100094', 'step': 7189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:44.156183', 'step': 7189, 'epoch': 1} {'type': 'loss', 'content': 0.11620708554983139, 'timestamp': '2025-09-10 02:42:44.163909', 'step': 7190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:44.226950', 'step': 7190, 'epoch': 1} {'type': 'loss', 'content': 0.17998884618282318, 'timestamp': '2025-09-10 02:42:44.229073', 'step': 7191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:44.287691', 'step': 7191, 'epoch': 1} {'type': 'loss', 'content': 0.1752844899892807, 'timestamp': '2025-09-10 02:42:44.293557', 'step': 7192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:44.348697', 'step': 7192, 'epoch': 1} {'type': 'loss', 'content': 0.19068121910095215, 'timestamp': '2025-09-10 02:42:44.351700', 'step': 7193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:44.408084', 'step': 7193, 'epoch': 1} {'type': 'loss', 'content': 0.15537522733211517, 'timestamp': '2025-09-10 02:42:44.410200', 'step': 7194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:44.464798', 'step': 7194, 'epoch': 1} {'type': 'loss', 'content': 0.20619024336338043, 'timestamp': '2025-09-10 02:42:44.466661', 'step': 7195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:44.520506', 'step': 7195, 'epoch': 1} {'type': 'loss', 'content': 0.1679590493440628, 'timestamp': '2025-09-10 02:42:44.526851', 'step': 7196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:44.584893', 'step': 7196, 'epoch': 1} {'type': 'loss', 'content': 0.19097506999969482, 'timestamp': '2025-09-10 02:42:44.586547', 'step': 7197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:44.641965', 'step': 7197, 'epoch': 1} {'type': 'loss', 'content': 0.07560092210769653, 'timestamp': '2025-09-10 02:42:44.644368', 'step': 7198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:44.703442', 'step': 7198, 'epoch': 1} {'type': 'loss', 'content': 0.13508813083171844, 'timestamp': '2025-09-10 02:42:44.705852', 'step': 7199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:44.760423', 'step': 7199, 'epoch': 1} {'type': 'loss', 'content': 0.1251940280199051, 'timestamp': '2025-09-10 02:42:44.770625', 'step': 7200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:44.827493', 'step': 7200, 'epoch': 1} {'type': 'loss', 'content': 0.06751846522092819, 'timestamp': '2025-09-10 02:42:44.829465', 'step': 7201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:44.888002', 'step': 7201, 'epoch': 1} {'type': 'loss', 'content': 0.14021605253219604, 'timestamp': '2025-09-10 02:42:44.890290', 'step': 7202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:44.945307', 'step': 7202, 'epoch': 1} {'type': 'loss', 'content': 0.2429480105638504, 'timestamp': '2025-09-10 02:42:44.947588', 'step': 7203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:45.001202', 'step': 7203, 'epoch': 1} {'type': 'loss', 'content': 0.19645142555236816, 'timestamp': '2025-09-10 02:42:45.006880', 'step': 7204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:45.070310', 'step': 7204, 'epoch': 1} {'type': 'loss', 'content': 0.2257310152053833, 'timestamp': '2025-09-10 02:42:45.072197', 'step': 7205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:45.126092', 'step': 7205, 'epoch': 1} {'type': 'loss', 'content': 0.14159773290157318, 'timestamp': '2025-09-10 02:42:45.128166', 'step': 7206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:45.190908', 'step': 7206, 'epoch': 1} {'type': 'loss', 'content': 0.23112745583057404, 'timestamp': '2025-09-10 02:42:45.193200', 'step': 7207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:45.249946', 'step': 7207, 'epoch': 1} {'type': 'loss', 'content': 0.1758236140012741, 'timestamp': '2025-09-10 02:42:45.256421', 'step': 7208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:45.310762', 'step': 7208, 'epoch': 1} {'type': 'loss', 'content': 0.1634998768568039, 'timestamp': '2025-09-10 02:42:45.314296', 'step': 7209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:45.369606', 'step': 7209, 'epoch': 1} {'type': 'loss', 'content': 0.14947906136512756, 'timestamp': '2025-09-10 02:42:45.379116', 'step': 7210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:45.440140', 'step': 7210, 'epoch': 1} {'type': 'loss', 'content': 0.09162867814302444, 'timestamp': '2025-09-10 02:42:45.442459', 'step': 7211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:45.499216', 'step': 7211, 'epoch': 1} {'type': 'loss', 'content': 0.12167447060346603, 'timestamp': '2025-09-10 02:42:45.505223', 'step': 7212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:45.559102', 'step': 7212, 'epoch': 1} {'type': 'loss', 'content': 0.15914811193943024, 'timestamp': '2025-09-10 02:42:45.561756', 'step': 7213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:45.617523', 'step': 7213, 'epoch': 1} {'type': 'loss', 'content': 0.18359868228435516, 'timestamp': '2025-09-10 02:42:45.622889', 'step': 7214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:45.681277', 'step': 7214, 'epoch': 1} {'type': 'loss', 'content': 0.10965639352798462, 'timestamp': '2025-09-10 02:42:45.682960', 'step': 7215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:45.738421', 'step': 7215, 'epoch': 1} {'type': 'loss', 'content': 0.1340237855911255, 'timestamp': '2025-09-10 02:42:45.744586', 'step': 7216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:45.807025', 'step': 7216, 'epoch': 1} {'type': 'loss', 'content': 0.17152220010757446, 'timestamp': '2025-09-10 02:42:45.809254', 'step': 7217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:45.864445', 'step': 7217, 'epoch': 1} {'type': 'loss', 'content': 0.09845871478319168, 'timestamp': '2025-09-10 02:42:45.869665', 'step': 7218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:45.932631', 'step': 7218, 'epoch': 1} {'type': 'loss', 'content': 0.14893798530101776, 'timestamp': '2025-09-10 02:42:45.934979', 'step': 7219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:45.991240', 'step': 7219, 'epoch': 1} {'type': 'loss', 'content': 0.15366266667842865, 'timestamp': '2025-09-10 02:42:45.996939', 'step': 7220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:46.050993', 'step': 7220, 'epoch': 1} {'type': 'loss', 'content': 0.16887588798999786, 'timestamp': '2025-09-10 02:42:46.056500', 'step': 7221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:46.118607', 'step': 7221, 'epoch': 1} {'type': 'loss', 'content': 0.14636646211147308, 'timestamp': '2025-09-10 02:42:46.123931', 'step': 7222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:46.182695', 'step': 7222, 'epoch': 1} {'type': 'loss', 'content': 0.16304275393486023, 'timestamp': '2025-09-10 02:42:46.184715', 'step': 7223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:46.243779', 'step': 7223, 'epoch': 1} {'type': 'loss', 'content': 0.11164939403533936, 'timestamp': '2025-09-10 02:42:46.249853', 'step': 7224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:46.303923', 'step': 7224, 'epoch': 1} {'type': 'loss', 'content': 0.19475387036800385, 'timestamp': '2025-09-10 02:42:46.306202', 'step': 7225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:46.374953', 'step': 7225, 'epoch': 1} {'type': 'loss', 'content': 0.1815342754125595, 'timestamp': '2025-09-10 02:42:46.376838', 'step': 7226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:46.440724', 'step': 7226, 'epoch': 1} {'type': 'loss', 'content': 0.1488509476184845, 'timestamp': '2025-09-10 02:42:46.442670', 'step': 7227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:46.498823', 'step': 7227, 'epoch': 1} {'type': 'loss', 'content': 0.10628505051136017, 'timestamp': '2025-09-10 02:42:46.504688', 'step': 7228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:46.561854', 'step': 7228, 'epoch': 1} {'type': 'loss', 'content': 0.13667050004005432, 'timestamp': '2025-09-10 02:42:46.564142', 'step': 7229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:46.618532', 'step': 7229, 'epoch': 1} {'type': 'loss', 'content': 0.1142703965306282, 'timestamp': '2025-09-10 02:42:46.620716', 'step': 7230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:46.677624', 'step': 7230, 'epoch': 1} {'type': 'loss', 'content': 0.17594625055789948, 'timestamp': '2025-09-10 02:42:46.679843', 'step': 7231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:46.744514', 'step': 7231, 'epoch': 1} {'type': 'loss', 'content': 0.11459286510944366, 'timestamp': '2025-09-10 02:42:46.750602', 'step': 7232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:46.827741', 'step': 7232, 'epoch': 1} {'type': 'loss', 'content': 0.1245211511850357, 'timestamp': '2025-09-10 02:42:46.829943', 'step': 7233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:46.894060', 'step': 7233, 'epoch': 1} {'type': 'loss', 'content': 0.09584440290927887, 'timestamp': '2025-09-10 02:42:46.896291', 'step': 7234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:47.014818', 'step': 7234, 'epoch': 1} {'type': 'loss', 'content': 0.0901910662651062, 'timestamp': '2025-09-10 02:42:47.019143', 'step': 7235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:47.096350', 'step': 7235, 'epoch': 1} {'type': 'loss', 'content': 0.12354271858930588, 'timestamp': '2025-09-10 02:42:47.102665', 'step': 7236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:47.172761', 'step': 7236, 'epoch': 1} {'type': 'loss', 'content': 0.12605872750282288, 'timestamp': '2025-09-10 02:42:47.175140', 'step': 7237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:47.230522', 'step': 7237, 'epoch': 1} {'type': 'loss', 'content': 0.09062808752059937, 'timestamp': '2025-09-10 02:42:47.233032', 'step': 7238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:47.292335', 'step': 7238, 'epoch': 1} {'type': 'loss', 'content': 0.14186809957027435, 'timestamp': '2025-09-10 02:42:47.294677', 'step': 7239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:47.352398', 'step': 7239, 'epoch': 1} {'type': 'loss', 'content': 0.20919424295425415, 'timestamp': '2025-09-10 02:42:47.359360', 'step': 7240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:47.420577', 'step': 7240, 'epoch': 1} {'type': 'loss', 'content': 0.14014926552772522, 'timestamp': '2025-09-10 02:42:47.425348', 'step': 7241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:47.490899', 'step': 7241, 'epoch': 1} {'type': 'loss', 'content': 0.07659163326025009, 'timestamp': '2025-09-10 02:42:47.493076', 'step': 7242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:47.547623', 'step': 7242, 'epoch': 1} {'type': 'loss', 'content': 0.21206554770469666, 'timestamp': '2025-09-10 02:42:47.549865', 'step': 7243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:47.604372', 'step': 7243, 'epoch': 1} {'type': 'loss', 'content': 0.11125333607196808, 'timestamp': '2025-09-10 02:42:47.610620', 'step': 7244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:47.664942', 'step': 7244, 'epoch': 1} {'type': 'loss', 'content': 0.1958424299955368, 'timestamp': '2025-09-10 02:42:47.667067', 'step': 7245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:47.721261', 'step': 7245, 'epoch': 1} {'type': 'loss', 'content': 0.1655379831790924, 'timestamp': '2025-09-10 02:42:47.723062', 'step': 7246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:47.778469', 'step': 7246, 'epoch': 1} {'type': 'loss', 'content': 0.09061521291732788, 'timestamp': '2025-09-10 02:42:47.787342', 'step': 7247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:47.849912', 'step': 7247, 'epoch': 1} {'type': 'loss', 'content': 0.19101519882678986, 'timestamp': '2025-09-10 02:42:47.856184', 'step': 7248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:47.911942', 'step': 7248, 'epoch': 1} {'type': 'loss', 'content': 0.10743138939142227, 'timestamp': '2025-09-10 02:42:47.914084', 'step': 7249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:47.983813', 'step': 7249, 'epoch': 1} {'type': 'loss', 'content': 0.11364971846342087, 'timestamp': '2025-09-10 02:42:47.986016', 'step': 7250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:48.046693', 'step': 7250, 'epoch': 1} {'type': 'loss', 'content': 0.1085081472992897, 'timestamp': '2025-09-10 02:42:48.048855', 'step': 7251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:48.103924', 'step': 7251, 'epoch': 1} {'type': 'loss', 'content': 0.1157822459936142, 'timestamp': '2025-09-10 02:42:48.112575', 'step': 7252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:48.174344', 'step': 7252, 'epoch': 1} {'type': 'loss', 'content': 0.18341776728630066, 'timestamp': '2025-09-10 02:42:48.180001', 'step': 7253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:48.239148', 'step': 7253, 'epoch': 1} {'type': 'loss', 'content': 0.14322908222675323, 'timestamp': '2025-09-10 02:42:48.241673', 'step': 7254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:48.298815', 'step': 7254, 'epoch': 1} {'type': 'loss', 'content': 0.10513874143362045, 'timestamp': '2025-09-10 02:42:48.301558', 'step': 7255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:48.362337', 'step': 7255, 'epoch': 1} {'type': 'loss', 'content': 0.17738007009029388, 'timestamp': '2025-09-10 02:42:48.368753', 'step': 7256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:48.427440', 'step': 7256, 'epoch': 1} {'type': 'loss', 'content': 0.13030056655406952, 'timestamp': '2025-09-10 02:42:48.433083', 'step': 7257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:48.492247', 'step': 7257, 'epoch': 1} {'type': 'loss', 'content': 0.2120198905467987, 'timestamp': '2025-09-10 02:42:48.494441', 'step': 7258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:48.556846', 'step': 7258, 'epoch': 1} {'type': 'loss', 'content': 0.18162833154201508, 'timestamp': '2025-09-10 02:42:48.559080', 'step': 7259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:48.620660', 'step': 7259, 'epoch': 1} {'type': 'loss', 'content': 0.2385486364364624, 'timestamp': '2025-09-10 02:42:48.628552', 'step': 7260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:48.707941', 'step': 7260, 'epoch': 1} {'type': 'loss', 'content': 0.2911739945411682, 'timestamp': '2025-09-10 02:42:48.710072', 'step': 7261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:48.771766', 'step': 7261, 'epoch': 1} {'type': 'loss', 'content': 0.11762602627277374, 'timestamp': '2025-09-10 02:42:48.776647', 'step': 7262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:48.836945', 'step': 7262, 'epoch': 1} {'type': 'loss', 'content': 0.16601963341236115, 'timestamp': '2025-09-10 02:42:48.839147', 'step': 7263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:48.899843', 'step': 7263, 'epoch': 1} {'type': 'loss', 'content': 0.1356191486120224, 'timestamp': '2025-09-10 02:42:48.909433', 'step': 7264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:48.979894', 'step': 7264, 'epoch': 1} {'type': 'loss', 'content': 0.21045978367328644, 'timestamp': '2025-09-10 02:42:48.981840', 'step': 7265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:49.036749', 'step': 7265, 'epoch': 1} {'type': 'loss', 'content': 0.15098024904727936, 'timestamp': '2025-09-10 02:42:49.038731', 'step': 7266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:49.121510', 'step': 7266, 'epoch': 1} {'type': 'loss', 'content': 0.07010677456855774, 'timestamp': '2025-09-10 02:42:49.125267', 'step': 7267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:49.224870', 'step': 7267, 'epoch': 1} {'type': 'loss', 'content': 0.11133667081594467, 'timestamp': '2025-09-10 02:42:49.230921', 'step': 7268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:49.327560', 'step': 7268, 'epoch': 1} {'type': 'loss', 'content': 0.26319631934165955, 'timestamp': '2025-09-10 02:42:49.330093', 'step': 7269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:49.387199', 'step': 7269, 'epoch': 1} {'type': 'loss', 'content': 0.150132954120636, 'timestamp': '2025-09-10 02:42:49.390616', 'step': 7270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:49.448692', 'step': 7270, 'epoch': 1} {'type': 'loss', 'content': 0.19604036211967468, 'timestamp': '2025-09-10 02:42:49.450892', 'step': 7271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:49.515583', 'step': 7271, 'epoch': 1} {'type': 'loss', 'content': 0.15616998076438904, 'timestamp': '2025-09-10 02:42:49.521601', 'step': 7272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:49.580615', 'step': 7272, 'epoch': 1} {'type': 'loss', 'content': 0.15003015100955963, 'timestamp': '2025-09-10 02:42:49.585511', 'step': 7273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:49.646882', 'step': 7273, 'epoch': 1} {'type': 'loss', 'content': 0.13770151138305664, 'timestamp': '2025-09-10 02:42:49.649112', 'step': 7274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:49.703293', 'step': 7274, 'epoch': 1} {'type': 'loss', 'content': 0.08154875785112381, 'timestamp': '2025-09-10 02:42:49.706483', 'step': 7275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:49.765189', 'step': 7275, 'epoch': 1} {'type': 'loss', 'content': 0.1837252676486969, 'timestamp': '2025-09-10 02:42:49.771326', 'step': 7276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:49.824681', 'step': 7276, 'epoch': 1} {'type': 'loss', 'content': 0.09492313861846924, 'timestamp': '2025-09-10 02:42:49.827118', 'step': 7277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:49.881238', 'step': 7277, 'epoch': 1} {'type': 'loss', 'content': 0.20954912900924683, 'timestamp': '2025-09-10 02:42:49.883449', 'step': 7278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:49.939631', 'step': 7278, 'epoch': 1} {'type': 'loss', 'content': 0.11403746902942657, 'timestamp': '2025-09-10 02:42:49.941819', 'step': 7279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:50.000720', 'step': 7279, 'epoch': 1} {'type': 'loss', 'content': 0.23719477653503418, 'timestamp': '2025-09-10 02:42:50.006811', 'step': 7280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:50.060839', 'step': 7280, 'epoch': 1} {'type': 'loss', 'content': 0.2721875309944153, 'timestamp': '2025-09-10 02:42:50.063044', 'step': 7281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:50.120527', 'step': 7281, 'epoch': 1} {'type': 'loss', 'content': 0.11700516939163208, 'timestamp': '2025-09-10 02:42:50.124422', 'step': 7282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:50.181676', 'step': 7282, 'epoch': 1} {'type': 'loss', 'content': 0.1719130128622055, 'timestamp': '2025-09-10 02:42:50.184205', 'step': 7283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:50.244352', 'step': 7283, 'epoch': 1} {'type': 'loss', 'content': 0.07227518409490585, 'timestamp': '2025-09-10 02:42:50.250674', 'step': 7284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:50.307910', 'step': 7284, 'epoch': 1} {'type': 'loss', 'content': 0.058024000376462936, 'timestamp': '2025-09-10 02:42:50.310143', 'step': 7285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:50.372547', 'step': 7285, 'epoch': 1} {'type': 'loss', 'content': 0.12747988104820251, 'timestamp': '2025-09-10 02:42:50.375469', 'step': 7286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:50.437760', 'step': 7286, 'epoch': 1} {'type': 'loss', 'content': 0.11323441565036774, 'timestamp': '2025-09-10 02:42:50.440804', 'step': 7287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:50.500608', 'step': 7287, 'epoch': 1} {'type': 'loss', 'content': 0.1319715678691864, 'timestamp': '2025-09-10 02:42:50.507402', 'step': 7288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:50.562370', 'step': 7288, 'epoch': 1} {'type': 'loss', 'content': 0.09028506278991699, 'timestamp': '2025-09-10 02:42:50.564668', 'step': 7289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:50.620595', 'step': 7289, 'epoch': 1} {'type': 'loss', 'content': 0.170747771859169, 'timestamp': '2025-09-10 02:42:50.622633', 'step': 7290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:50.694917', 'step': 7290, 'epoch': 1} {'type': 'loss', 'content': 0.1915271133184433, 'timestamp': '2025-09-10 02:42:50.700419', 'step': 7291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:50.762174', 'step': 7291, 'epoch': 1} {'type': 'loss', 'content': 0.18580666184425354, 'timestamp': '2025-09-10 02:42:50.767893', 'step': 7292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:50.823854', 'step': 7292, 'epoch': 1} {'type': 'loss', 'content': 0.15237700939178467, 'timestamp': '2025-09-10 02:42:50.825921', 'step': 7293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:50.900142', 'step': 7293, 'epoch': 1} {'type': 'loss', 'content': 0.10733123123645782, 'timestamp': '2025-09-10 02:42:50.902283', 'step': 7294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:50.968080', 'step': 7294, 'epoch': 1} {'type': 'loss', 'content': 0.1546654850244522, 'timestamp': '2025-09-10 02:42:50.974213', 'step': 7295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:51.031530', 'step': 7295, 'epoch': 1} {'type': 'loss', 'content': 0.09660375118255615, 'timestamp': '2025-09-10 02:42:51.037487', 'step': 7296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:51.096375', 'step': 7296, 'epoch': 1} {'type': 'loss', 'content': 0.14776301383972168, 'timestamp': '2025-09-10 02:42:51.098598', 'step': 7297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:51.155878', 'step': 7297, 'epoch': 1} {'type': 'loss', 'content': 0.1583324670791626, 'timestamp': '2025-09-10 02:42:51.158282', 'step': 7298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:51.216345', 'step': 7298, 'epoch': 1} {'type': 'loss', 'content': 0.09819518029689789, 'timestamp': '2025-09-10 02:42:51.218683', 'step': 7299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:51.279366', 'step': 7299, 'epoch': 1} {'type': 'loss', 'content': 0.11104094237089157, 'timestamp': '2025-09-10 02:42:51.293003', 'step': 7300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:51.386747', 'step': 7300, 'epoch': 1} {'type': 'loss', 'content': 0.17480280995368958, 'timestamp': '2025-09-10 02:42:51.388890', 'step': 7301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:51.480370', 'step': 7301, 'epoch': 1} {'type': 'loss', 'content': 0.16388079524040222, 'timestamp': '2025-09-10 02:42:51.483390', 'step': 7302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:51.542152', 'step': 7302, 'epoch': 1} {'type': 'loss', 'content': 0.1738138496875763, 'timestamp': '2025-09-10 02:42:51.544363', 'step': 7303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:51.604170', 'step': 7303, 'epoch': 1} {'type': 'loss', 'content': 0.11699596047401428, 'timestamp': '2025-09-10 02:42:51.610210', 'step': 7304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:51.699298', 'step': 7304, 'epoch': 1} {'type': 'loss', 'content': 0.16154441237449646, 'timestamp': '2025-09-10 02:42:51.701629', 'step': 7305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:51.756086', 'step': 7305, 'epoch': 1} {'type': 'loss', 'content': 0.2105233371257782, 'timestamp': '2025-09-10 02:42:51.759190', 'step': 7306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:51.824014', 'step': 7306, 'epoch': 1} {'type': 'loss', 'content': 0.15480466187000275, 'timestamp': '2025-09-10 02:42:51.825945', 'step': 7307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:51.887796', 'step': 7307, 'epoch': 1} {'type': 'loss', 'content': 0.1550227850675583, 'timestamp': '2025-09-10 02:42:51.894291', 'step': 7308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:51.955904', 'step': 7308, 'epoch': 1} {'type': 'loss', 'content': 0.18616797029972076, 'timestamp': '2025-09-10 02:42:51.958057', 'step': 7309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:52.021137', 'step': 7309, 'epoch': 1} {'type': 'loss', 'content': 0.19000352919101715, 'timestamp': '2025-09-10 02:42:52.023274', 'step': 7310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:52.076331', 'step': 7310, 'epoch': 1} {'type': 'loss', 'content': 0.15016765892505646, 'timestamp': '2025-09-10 02:42:52.079871', 'step': 7311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:52.142519', 'step': 7311, 'epoch': 1} {'type': 'loss', 'content': 0.1958405077457428, 'timestamp': '2025-09-10 02:42:52.148532', 'step': 7312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:52.202104', 'step': 7312, 'epoch': 1} {'type': 'loss', 'content': 0.15487685799598694, 'timestamp': '2025-09-10 02:42:52.204390', 'step': 7313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:52.259575', 'step': 7313, 'epoch': 1} {'type': 'loss', 'content': 0.20088869333267212, 'timestamp': '2025-09-10 02:42:52.261780', 'step': 7314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:52.318036', 'step': 7314, 'epoch': 1} {'type': 'loss', 'content': 0.19589577615261078, 'timestamp': '2025-09-10 02:42:52.323899', 'step': 7315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:52.382070', 'step': 7315, 'epoch': 1} {'type': 'loss', 'content': 0.11358559876680374, 'timestamp': '2025-09-10 02:42:52.388142', 'step': 7316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:52.455211', 'step': 7316, 'epoch': 1} {'type': 'loss', 'content': 0.18455815315246582, 'timestamp': '2025-09-10 02:42:52.458075', 'step': 7317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:52.512983', 'step': 7317, 'epoch': 1} {'type': 'loss', 'content': 0.20572416484355927, 'timestamp': '2025-09-10 02:42:52.515238', 'step': 7318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:52.571112', 'step': 7318, 'epoch': 1} {'type': 'loss', 'content': 0.12339914590120316, 'timestamp': '2025-09-10 02:42:52.573300', 'step': 7319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:52.629346', 'step': 7319, 'epoch': 1} {'type': 'loss', 'content': 0.1885271817445755, 'timestamp': '2025-09-10 02:42:52.637611', 'step': 7320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:52.693058', 'step': 7320, 'epoch': 1} {'type': 'loss', 'content': 0.24839313328266144, 'timestamp': '2025-09-10 02:42:52.695179', 'step': 7321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:52.751111', 'step': 7321, 'epoch': 1} {'type': 'loss', 'content': 0.18966363370418549, 'timestamp': '2025-09-10 02:42:52.754460', 'step': 7322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:52.812263', 'step': 7322, 'epoch': 1} {'type': 'loss', 'content': 0.15521366894245148, 'timestamp': '2025-09-10 02:42:52.818484', 'step': 7323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:52.887380', 'step': 7323, 'epoch': 1} {'type': 'loss', 'content': 0.10921614617109299, 'timestamp': '2025-09-10 02:42:52.893374', 'step': 7324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:52.952324', 'step': 7324, 'epoch': 1} {'type': 'loss', 'content': 0.12414297461509705, 'timestamp': '2025-09-10 02:42:52.954472', 'step': 7325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:53.008447', 'step': 7325, 'epoch': 1} {'type': 'loss', 'content': 0.19322805106639862, 'timestamp': '2025-09-10 02:42:53.010595', 'step': 7326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:53.069445', 'step': 7326, 'epoch': 1} {'type': 'loss', 'content': 0.11068763583898544, 'timestamp': '2025-09-10 02:42:53.072228', 'step': 7327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:53.134676', 'step': 7327, 'epoch': 1} {'type': 'loss', 'content': 0.15377475321292877, 'timestamp': '2025-09-10 02:42:53.141235', 'step': 7328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:53.214461', 'step': 7328, 'epoch': 1} {'type': 'loss', 'content': 0.12865573167800903, 'timestamp': '2025-09-10 02:42:53.219618', 'step': 7329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:53.297814', 'step': 7329, 'epoch': 1} {'type': 'loss', 'content': 0.19509078562259674, 'timestamp': '2025-09-10 02:42:53.300132', 'step': 7330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:53.364384', 'step': 7330, 'epoch': 1} {'type': 'loss', 'content': 0.12134206295013428, 'timestamp': '2025-09-10 02:42:53.370501', 'step': 7331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:53.441859', 'step': 7331, 'epoch': 1} {'type': 'loss', 'content': 0.17206840217113495, 'timestamp': '2025-09-10 02:42:53.450012', 'step': 7332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:53.537949', 'step': 7332, 'epoch': 1} {'type': 'loss', 'content': 0.12221724539995193, 'timestamp': '2025-09-10 02:42:53.540422', 'step': 7333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:53.602675', 'step': 7333, 'epoch': 1} {'type': 'loss', 'content': 0.1050797775387764, 'timestamp': '2025-09-10 02:42:53.604934', 'step': 7334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:53.660075', 'step': 7334, 'epoch': 1} {'type': 'loss', 'content': 0.09494299441576004, 'timestamp': '2025-09-10 02:42:53.662210', 'step': 7335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:53.722000', 'step': 7335, 'epoch': 1} {'type': 'loss', 'content': 0.1821320652961731, 'timestamp': '2025-09-10 02:42:53.728126', 'step': 7336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:53.783239', 'step': 7336, 'epoch': 1} {'type': 'loss', 'content': 0.21772775053977966, 'timestamp': '2025-09-10 02:42:53.785442', 'step': 7337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:53.842625', 'step': 7337, 'epoch': 1} {'type': 'loss', 'content': 0.1284889429807663, 'timestamp': '2025-09-10 02:42:53.850582', 'step': 7338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:53.907142', 'step': 7338, 'epoch': 1} {'type': 'loss', 'content': 0.2021784782409668, 'timestamp': '2025-09-10 02:42:53.909413', 'step': 7339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:53.962926', 'step': 7339, 'epoch': 1} {'type': 'loss', 'content': 0.18592512607574463, 'timestamp': '2025-09-10 02:42:53.969943', 'step': 7340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:54.033728', 'step': 7340, 'epoch': 1} {'type': 'loss', 'content': 0.2063453048467636, 'timestamp': '2025-09-10 02:42:54.039205', 'step': 7341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:54.110261', 'step': 7341, 'epoch': 1} {'type': 'loss', 'content': 0.17140990495681763, 'timestamp': '2025-09-10 02:42:54.113194', 'step': 7342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:54.172555', 'step': 7342, 'epoch': 1} {'type': 'loss', 'content': 0.12673164904117584, 'timestamp': '2025-09-10 02:42:54.174709', 'step': 7343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:54.230223', 'step': 7343, 'epoch': 1} {'type': 'loss', 'content': 0.27321070432662964, 'timestamp': '2025-09-10 02:42:54.236470', 'step': 7344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:54.289908', 'step': 7344, 'epoch': 1} {'type': 'loss', 'content': 0.13225635886192322, 'timestamp': '2025-09-10 02:42:54.293306', 'step': 7345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:54.355291', 'step': 7345, 'epoch': 1} {'type': 'loss', 'content': 0.1480656862258911, 'timestamp': '2025-09-10 02:42:54.357679', 'step': 7346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:54.446750', 'step': 7346, 'epoch': 1} {'type': 'loss', 'content': 0.13527503609657288, 'timestamp': '2025-09-10 02:42:54.449199', 'step': 7347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:54.535211', 'step': 7347, 'epoch': 1} {'type': 'loss', 'content': 0.14524705708026886, 'timestamp': '2025-09-10 02:42:54.548202', 'step': 7348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:54.608969', 'step': 7348, 'epoch': 1} {'type': 'loss', 'content': 0.0719866082072258, 'timestamp': '2025-09-10 02:42:54.614093', 'step': 7349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:54.694686', 'step': 7349, 'epoch': 1} {'type': 'loss', 'content': 0.10946489870548248, 'timestamp': '2025-09-10 02:42:54.698650', 'step': 7350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:54.755176', 'step': 7350, 'epoch': 1} {'type': 'loss', 'content': 0.1278868168592453, 'timestamp': '2025-09-10 02:42:54.757313', 'step': 7351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:54.819903', 'step': 7351, 'epoch': 1} {'type': 'loss', 'content': 0.1682923585176468, 'timestamp': '2025-09-10 02:42:54.826380', 'step': 7352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:54.880739', 'step': 7352, 'epoch': 1} {'type': 'loss', 'content': 0.1637570858001709, 'timestamp': '2025-09-10 02:42:54.883068', 'step': 7353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:54.939674', 'step': 7353, 'epoch': 1} {'type': 'loss', 'content': 0.13280099630355835, 'timestamp': '2025-09-10 02:42:54.948000', 'step': 7354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:55.018869', 'step': 7354, 'epoch': 1} {'type': 'loss', 'content': 0.20308144390583038, 'timestamp': '2025-09-10 02:42:55.021392', 'step': 7355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:55.104036', 'step': 7355, 'epoch': 1} {'type': 'loss', 'content': 0.10573503375053406, 'timestamp': '2025-09-10 02:42:55.115940', 'step': 7356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:55.208963', 'step': 7356, 'epoch': 1} {'type': 'loss', 'content': 0.19144272804260254, 'timestamp': '2025-09-10 02:42:55.211353', 'step': 7357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:55.297888', 'step': 7357, 'epoch': 1} {'type': 'loss', 'content': 0.09592780470848083, 'timestamp': '2025-09-10 02:42:55.300163', 'step': 7358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:55.356446', 'step': 7358, 'epoch': 1} {'type': 'loss', 'content': 0.15803934633731842, 'timestamp': '2025-09-10 02:42:55.358670', 'step': 7359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:55.420500', 'step': 7359, 'epoch': 1} {'type': 'loss', 'content': 0.12215384840965271, 'timestamp': '2025-09-10 02:42:55.426706', 'step': 7360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:55.485641', 'step': 7360, 'epoch': 1} {'type': 'loss', 'content': 0.18583500385284424, 'timestamp': '2025-09-10 02:42:55.488437', 'step': 7361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:55.547633', 'step': 7361, 'epoch': 1} {'type': 'loss', 'content': 0.09790153056383133, 'timestamp': '2025-09-10 02:42:55.549855', 'step': 7362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:55.609025', 'step': 7362, 'epoch': 1} {'type': 'loss', 'content': 0.08711663633584976, 'timestamp': '2025-09-10 02:42:55.611199', 'step': 7363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:55.677608', 'step': 7363, 'epoch': 1} {'type': 'loss', 'content': 0.25396648049354553, 'timestamp': '2025-09-10 02:42:55.683815', 'step': 7364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:55.753598', 'step': 7364, 'epoch': 1} {'type': 'loss', 'content': 0.22075510025024414, 'timestamp': '2025-09-10 02:42:55.756060', 'step': 7365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:55.824275', 'step': 7365, 'epoch': 1} {'type': 'loss', 'content': 0.10134576261043549, 'timestamp': '2025-09-10 02:42:55.829162', 'step': 7366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:55.911528', 'step': 7366, 'epoch': 1} {'type': 'loss', 'content': 0.15753555297851562, 'timestamp': '2025-09-10 02:42:55.915194', 'step': 7367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:55.995755', 'step': 7367, 'epoch': 1} {'type': 'loss', 'content': 0.2078460156917572, 'timestamp': '2025-09-10 02:42:56.002321', 'step': 7368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:56.067132', 'step': 7368, 'epoch': 1} {'type': 'loss', 'content': 0.16552302241325378, 'timestamp': '2025-09-10 02:42:56.069428', 'step': 7369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:56.136404', 'step': 7369, 'epoch': 1} {'type': 'loss', 'content': 0.12800510227680206, 'timestamp': '2025-09-10 02:42:56.139664', 'step': 7370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:56.215195', 'step': 7370, 'epoch': 1} {'type': 'loss', 'content': 0.13524308800697327, 'timestamp': '2025-09-10 02:42:56.218721', 'step': 7371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:56.286034', 'step': 7371, 'epoch': 1} {'type': 'loss', 'content': 0.15021461248397827, 'timestamp': '2025-09-10 02:42:56.292499', 'step': 7372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:56.356063', 'step': 7372, 'epoch': 1} {'type': 'loss', 'content': 0.17019209265708923, 'timestamp': '2025-09-10 02:42:56.360519', 'step': 7373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:56.426673', 'step': 7373, 'epoch': 1} {'type': 'loss', 'content': 0.1865037977695465, 'timestamp': '2025-09-10 02:42:56.429618', 'step': 7374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:56.498119', 'step': 7374, 'epoch': 1} {'type': 'loss', 'content': 0.13972467184066772, 'timestamp': '2025-09-10 02:42:56.500402', 'step': 7375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:56.568662', 'step': 7375, 'epoch': 1} {'type': 'loss', 'content': 0.20509973168373108, 'timestamp': '2025-09-10 02:42:56.574989', 'step': 7376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:56.636508', 'step': 7376, 'epoch': 1} {'type': 'loss', 'content': 0.19442981481552124, 'timestamp': '2025-09-10 02:42:56.639542', 'step': 7377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:56.703749', 'step': 7377, 'epoch': 1} {'type': 'loss', 'content': 0.15127871930599213, 'timestamp': '2025-09-10 02:42:56.709570', 'step': 7378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:56.779119', 'step': 7378, 'epoch': 1} {'type': 'loss', 'content': 0.1392962485551834, 'timestamp': '2025-09-10 02:42:56.782476', 'step': 7379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:56.859511', 'step': 7379, 'epoch': 1} {'type': 'loss', 'content': 0.1413235068321228, 'timestamp': '2025-09-10 02:42:56.865790', 'step': 7380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:56.949464', 'step': 7380, 'epoch': 1} {'type': 'loss', 'content': 0.19906166195869446, 'timestamp': '2025-09-10 02:42:56.953048', 'step': 7381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:57.013674', 'step': 7381, 'epoch': 1} {'type': 'loss', 'content': 0.18856799602508545, 'timestamp': '2025-09-10 02:42:57.018874', 'step': 7382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:57.088653', 'step': 7382, 'epoch': 1} {'type': 'loss', 'content': 0.17066358029842377, 'timestamp': '2025-09-10 02:42:57.093016', 'step': 7383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:57.163316', 'step': 7383, 'epoch': 1} {'type': 'loss', 'content': 0.13549299538135529, 'timestamp': '2025-09-10 02:42:57.171530', 'step': 7384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:57.234826', 'step': 7384, 'epoch': 1} {'type': 'loss', 'content': 0.23774752020835876, 'timestamp': '2025-09-10 02:42:57.238741', 'step': 7385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:57.309629', 'step': 7385, 'epoch': 1} {'type': 'loss', 'content': 0.1207268163561821, 'timestamp': '2025-09-10 02:42:57.312143', 'step': 7386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:57.391614', 'step': 7386, 'epoch': 1} {'type': 'loss', 'content': 0.07975294440984726, 'timestamp': '2025-09-10 02:42:57.396691', 'step': 7387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:57.471560', 'step': 7387, 'epoch': 1} {'type': 'loss', 'content': 0.11165216565132141, 'timestamp': '2025-09-10 02:42:57.477703', 'step': 7388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:57.538016', 'step': 7388, 'epoch': 1} {'type': 'loss', 'content': 0.25045090913772583, 'timestamp': '2025-09-10 02:42:57.540366', 'step': 7389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:57.606806', 'step': 7389, 'epoch': 1} {'type': 'loss', 'content': 0.15435239672660828, 'timestamp': '2025-09-10 02:42:57.610594', 'step': 7390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:57.715904', 'step': 7390, 'epoch': 1} {'type': 'loss', 'content': 0.23970596492290497, 'timestamp': '2025-09-10 02:42:57.718070', 'step': 7391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:57.817673', 'step': 7391, 'epoch': 1} {'type': 'loss', 'content': 0.17210114002227783, 'timestamp': '2025-09-10 02:42:57.834200', 'step': 7392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:57.911800', 'step': 7392, 'epoch': 1} {'type': 'loss', 'content': 0.1997833102941513, 'timestamp': '2025-09-10 02:42:57.913998', 'step': 7393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:57.990525', 'step': 7393, 'epoch': 1} {'type': 'loss', 'content': 0.1629970520734787, 'timestamp': '2025-09-10 02:42:57.993049', 'step': 7394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:58.076870', 'step': 7394, 'epoch': 1} {'type': 'loss', 'content': 0.14012189209461212, 'timestamp': '2025-09-10 02:42:58.079336', 'step': 7395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:58.139340', 'step': 7395, 'epoch': 1} {'type': 'loss', 'content': 0.1272539645433426, 'timestamp': '2025-09-10 02:42:58.149542', 'step': 7396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:58.228497', 'step': 7396, 'epoch': 1} {'type': 'loss', 'content': 0.1294233649969101, 'timestamp': '2025-09-10 02:42:58.232690', 'step': 7397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:58.328320', 'step': 7397, 'epoch': 1} {'type': 'loss', 'content': 0.12852968275547028, 'timestamp': '2025-09-10 02:42:58.330628', 'step': 7398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:58.397947', 'step': 7398, 'epoch': 1} {'type': 'loss', 'content': 0.13613447546958923, 'timestamp': '2025-09-10 02:42:58.400287', 'step': 7399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:58.494142', 'step': 7399, 'epoch': 1} {'type': 'loss', 'content': 0.13341817259788513, 'timestamp': '2025-09-10 02:42:58.500681', 'step': 7400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:58.589918', 'step': 7400, 'epoch': 1} {'type': 'loss', 'content': 0.24624262750148773, 'timestamp': '2025-09-10 02:42:58.597882', 'step': 7401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:58.663778', 'step': 7401, 'epoch': 1} {'type': 'loss', 'content': 0.17442159354686737, 'timestamp': '2025-09-10 02:42:58.665965', 'step': 7402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:58.747123', 'step': 7402, 'epoch': 1} {'type': 'loss', 'content': 0.16449348628520966, 'timestamp': '2025-09-10 02:42:58.749429', 'step': 7403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:58.819971', 'step': 7403, 'epoch': 1} {'type': 'loss', 'content': 0.17181213200092316, 'timestamp': '2025-09-10 02:42:58.826122', 'step': 7404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:58.894744', 'step': 7404, 'epoch': 1} {'type': 'loss', 'content': 0.0987212136387825, 'timestamp': '2025-09-10 02:42:58.897471', 'step': 7405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:58.961399', 'step': 7405, 'epoch': 1} {'type': 'loss', 'content': 0.1577984243631363, 'timestamp': '2025-09-10 02:42:58.963587', 'step': 7406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:59.023318', 'step': 7406, 'epoch': 1} {'type': 'loss', 'content': 0.19635769724845886, 'timestamp': '2025-09-10 02:42:59.025546', 'step': 7407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:59.089630', 'step': 7407, 'epoch': 1} {'type': 'loss', 'content': 0.3210546374320984, 'timestamp': '2025-09-10 02:42:59.096183', 'step': 7408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:59.179001', 'step': 7408, 'epoch': 1} {'type': 'loss', 'content': 0.22600753605365753, 'timestamp': '2025-09-10 02:42:59.181314', 'step': 7409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:59.270393', 'step': 7409, 'epoch': 1} {'type': 'loss', 'content': 0.12130056321620941, 'timestamp': '2025-09-10 02:42:59.273445', 'step': 7410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:59.339611', 'step': 7410, 'epoch': 1} {'type': 'loss', 'content': 0.08947626501321793, 'timestamp': '2025-09-10 02:42:59.341813', 'step': 7411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:59.406029', 'step': 7411, 'epoch': 1} {'type': 'loss', 'content': 0.11117926985025406, 'timestamp': '2025-09-10 02:42:59.412099', 'step': 7412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:59.491101', 'step': 7412, 'epoch': 1} {'type': 'loss', 'content': 0.21095767617225647, 'timestamp': '2025-09-10 02:42:59.493522', 'step': 7413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:59.569208', 'step': 7413, 'epoch': 1} {'type': 'loss', 'content': 0.06662828475236893, 'timestamp': '2025-09-10 02:42:59.571581', 'step': 7414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:59.634626', 'step': 7414, 'epoch': 1} {'type': 'loss', 'content': 0.06679535657167435, 'timestamp': '2025-09-10 02:42:59.636805', 'step': 7415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:59.715830', 'step': 7415, 'epoch': 1} {'type': 'loss', 'content': 0.12376607209444046, 'timestamp': '2025-09-10 02:42:59.721964', 'step': 7416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:42:59.777940', 'step': 7416, 'epoch': 1} {'type': 'loss', 'content': 0.2682870030403137, 'timestamp': '2025-09-10 02:42:59.780132', 'step': 7417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:42:59.839854', 'step': 7417, 'epoch': 1} {'type': 'loss', 'content': 0.07061216980218887, 'timestamp': '2025-09-10 02:42:59.842069', 'step': 7418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:42:59.901824', 'step': 7418, 'epoch': 1} {'type': 'loss', 'content': 0.1786983460187912, 'timestamp': '2025-09-10 02:42:59.904070', 'step': 7419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:59.968386', 'step': 7419, 'epoch': 1} {'type': 'loss', 'content': 0.18551115691661835, 'timestamp': '2025-09-10 02:42:59.974567', 'step': 7420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:00.065736', 'step': 7420, 'epoch': 1} {'type': 'loss', 'content': 0.07730972021818161, 'timestamp': '2025-09-10 02:43:00.070720', 'step': 7421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:00.142845', 'step': 7421, 'epoch': 1} {'type': 'loss', 'content': 0.17117206752300262, 'timestamp': '2025-09-10 02:43:00.145121', 'step': 7422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:00.215825', 'step': 7422, 'epoch': 1} {'type': 'loss', 'content': 0.1554817259311676, 'timestamp': '2025-09-10 02:43:00.218162', 'step': 7423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:00.323686', 'step': 7423, 'epoch': 1} {'type': 'loss', 'content': 0.1112903505563736, 'timestamp': '2025-09-10 02:43:00.329856', 'step': 7424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:00.388696', 'step': 7424, 'epoch': 1} {'type': 'loss', 'content': 0.2600846588611603, 'timestamp': '2025-09-10 02:43:00.390913', 'step': 7425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:00.463772', 'step': 7425, 'epoch': 1} {'type': 'loss', 'content': 0.11770326644182205, 'timestamp': '2025-09-10 02:43:00.465981', 'step': 7426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:00.528012', 'step': 7426, 'epoch': 1} {'type': 'loss', 'content': 0.11110739409923553, 'timestamp': '2025-09-10 02:43:00.530309', 'step': 7427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:00.593787', 'step': 7427, 'epoch': 1} {'type': 'loss', 'content': 0.07960714399814606, 'timestamp': '2025-09-10 02:43:00.600071', 'step': 7428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:00.655177', 'step': 7428, 'epoch': 1} {'type': 'loss', 'content': 0.17554353177547455, 'timestamp': '2025-09-10 02:43:00.657544', 'step': 7429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:00.742652', 'step': 7429, 'epoch': 1} {'type': 'loss', 'content': 0.1260630190372467, 'timestamp': '2025-09-10 02:43:00.744913', 'step': 7430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:00.825507', 'step': 7430, 'epoch': 1} {'type': 'loss', 'content': 0.21232233941555023, 'timestamp': '2025-09-10 02:43:00.827741', 'step': 7431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:00.910566', 'step': 7431, 'epoch': 1} {'type': 'loss', 'content': 0.18431580066680908, 'timestamp': '2025-09-10 02:43:00.916883', 'step': 7432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:00.979345', 'step': 7432, 'epoch': 1} {'type': 'loss', 'content': 0.14665141701698303, 'timestamp': '2025-09-10 02:43:00.981601', 'step': 7433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:01.039761', 'step': 7433, 'epoch': 1} {'type': 'loss', 'content': 0.15674395859241486, 'timestamp': '2025-09-10 02:43:01.041876', 'step': 7434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:01.102296', 'step': 7434, 'epoch': 1} {'type': 'loss', 'content': 0.151095449924469, 'timestamp': '2025-09-10 02:43:01.104519', 'step': 7435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:01.162839', 'step': 7435, 'epoch': 1} {'type': 'loss', 'content': 0.12176121771335602, 'timestamp': '2025-09-10 02:43:01.169256', 'step': 7436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:01.234243', 'step': 7436, 'epoch': 1} {'type': 'loss', 'content': 0.15950646996498108, 'timestamp': '2025-09-10 02:43:01.237020', 'step': 7437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:01.304915', 'step': 7437, 'epoch': 1} {'type': 'loss', 'content': 0.07678782194852829, 'timestamp': '2025-09-10 02:43:01.312508', 'step': 7438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:01.388239', 'step': 7438, 'epoch': 1} {'type': 'loss', 'content': 0.14475585520267487, 'timestamp': '2025-09-10 02:43:01.390492', 'step': 7439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:01.450717', 'step': 7439, 'epoch': 1} {'type': 'loss', 'content': 0.20028531551361084, 'timestamp': '2025-09-10 02:43:01.456687', 'step': 7440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:01.515051', 'step': 7440, 'epoch': 1} {'type': 'loss', 'content': 0.1512461155653, 'timestamp': '2025-09-10 02:43:01.517294', 'step': 7441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:01.579289', 'step': 7441, 'epoch': 1} {'type': 'loss', 'content': 0.14148177206516266, 'timestamp': '2025-09-10 02:43:01.581773', 'step': 7442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:01.641628', 'step': 7442, 'epoch': 1} {'type': 'loss', 'content': 0.2533884644508362, 'timestamp': '2025-09-10 02:43:01.643789', 'step': 7443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:01.718664', 'step': 7443, 'epoch': 1} {'type': 'loss', 'content': 0.0792384147644043, 'timestamp': '2025-09-10 02:43:01.725059', 'step': 7444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:01.821138', 'step': 7444, 'epoch': 1} {'type': 'loss', 'content': 0.16474485397338867, 'timestamp': '2025-09-10 02:43:01.823477', 'step': 7445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:01.899307', 'step': 7445, 'epoch': 1} {'type': 'loss', 'content': 0.13386274874210358, 'timestamp': '2025-09-10 02:43:01.901493', 'step': 7446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:01.965912', 'step': 7446, 'epoch': 1} {'type': 'loss', 'content': 0.23760035634040833, 'timestamp': '2025-09-10 02:43:01.968030', 'step': 7447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:02.032518', 'step': 7447, 'epoch': 1} {'type': 'loss', 'content': 0.1766597479581833, 'timestamp': '2025-09-10 02:43:02.038599', 'step': 7448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:02.097601', 'step': 7448, 'epoch': 1} {'type': 'loss', 'content': 0.12283706665039062, 'timestamp': '2025-09-10 02:43:02.099648', 'step': 7449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:02.170257', 'step': 7449, 'epoch': 1} {'type': 'loss', 'content': 0.147696390748024, 'timestamp': '2025-09-10 02:43:02.176362', 'step': 7450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:02.238036', 'step': 7450, 'epoch': 1} {'type': 'loss', 'content': 0.18884262442588806, 'timestamp': '2025-09-10 02:43:02.240916', 'step': 7451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:02.298305', 'step': 7451, 'epoch': 1} {'type': 'loss', 'content': 0.13042573630809784, 'timestamp': '2025-09-10 02:43:02.306717', 'step': 7452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:02.363000', 'step': 7452, 'epoch': 1} {'type': 'loss', 'content': 0.10628234595060349, 'timestamp': '2025-09-10 02:43:02.364999', 'step': 7453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:02.419462', 'step': 7453, 'epoch': 1} {'type': 'loss', 'content': 0.06970425695180893, 'timestamp': '2025-09-10 02:43:02.421393', 'step': 7454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:02.476376', 'step': 7454, 'epoch': 1} {'type': 'loss', 'content': 0.40259814262390137, 'timestamp': '2025-09-10 02:43:02.478614', 'step': 7455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:02.533393', 'step': 7455, 'epoch': 1} {'type': 'loss', 'content': 0.14242859184741974, 'timestamp': '2025-09-10 02:43:02.539302', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:43:15.774005', 'step': 7456, 'epoch': 1} {'type': 'pplx', 'content': 12433.038653892605, 'timestamp': '2025-09-10 02:43:15.777274', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:15.833326', 'step': 7456, 'epoch': 1} {'type': 'loss', 'content': 0.16304349899291992, 'timestamp': '2025-09-10 02:43:15.835319', 'step': 7457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:15.892412', 'step': 7457, 'epoch': 1} {'type': 'loss', 'content': 0.11812696605920792, 'timestamp': '2025-09-10 02:43:15.894366', 'step': 7458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:15.954920', 'step': 7458, 'epoch': 1} {'type': 'loss', 'content': 0.07195363193750381, 'timestamp': '2025-09-10 02:43:15.956911', 'step': 7459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1040006410960.0}, 'timestamp': '2025-09-10 02:43:16.052677', 'step': 7459, 'epoch': 1} {'type': 'loss', 'content': 0.34941989183425903, 'timestamp': '2025-09-10 02:43:16.058432', 'step': 7460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:16.145740', 'step': 7460, 'epoch': 2} {'type': 'loss', 'content': 0.06107894703745842, 'timestamp': '2025-09-10 02:43:16.148031', 'step': 7461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:16.223190', 'step': 7461, 'epoch': 2} {'type': 'loss', 'content': 0.10690769553184509, 'timestamp': '2025-09-10 02:43:16.225340', 'step': 7462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:16.313930', 'step': 7462, 'epoch': 2} {'type': 'loss', 'content': 0.14127184450626373, 'timestamp': '2025-09-10 02:43:16.315957', 'step': 7463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:16.387422', 'step': 7463, 'epoch': 2} {'type': 'loss', 'content': 0.12532490491867065, 'timestamp': '2025-09-10 02:43:16.393514', 'step': 7464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:16.456114', 'step': 7464, 'epoch': 2} {'type': 'loss', 'content': 0.20171955227851868, 'timestamp': '2025-09-10 02:43:16.458116', 'step': 7465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:16.529432', 'step': 7465, 'epoch': 2} {'type': 'loss', 'content': 0.09318245202302933, 'timestamp': '2025-09-10 02:43:16.531234', 'step': 7466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:16.594524', 'step': 7466, 'epoch': 2} {'type': 'loss', 'content': 0.1699184775352478, 'timestamp': '2025-09-10 02:43:16.596589', 'step': 7467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:16.657299', 'step': 7467, 'epoch': 2} {'type': 'loss', 'content': 0.09467007219791412, 'timestamp': '2025-09-10 02:43:16.663310', 'step': 7468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:16.743759', 'step': 7468, 'epoch': 2} {'type': 'loss', 'content': 0.14817580580711365, 'timestamp': '2025-09-10 02:43:16.745744', 'step': 7469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:16.818125', 'step': 7469, 'epoch': 2} {'type': 'loss', 'content': 0.14356614649295807, 'timestamp': '2025-09-10 02:43:16.820193', 'step': 7470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:16.884307', 'step': 7470, 'epoch': 2} {'type': 'loss', 'content': 0.1030631735920906, 'timestamp': '2025-09-10 02:43:16.886334', 'step': 7471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:16.941304', 'step': 7471, 'epoch': 2} {'type': 'loss', 'content': 0.1582149714231491, 'timestamp': '2025-09-10 02:43:16.947379', 'step': 7472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:17.004493', 'step': 7472, 'epoch': 2} {'type': 'loss', 'content': 0.1251974254846573, 'timestamp': '2025-09-10 02:43:17.006414', 'step': 7473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:17.063785', 'step': 7473, 'epoch': 2} {'type': 'loss', 'content': 0.11596167832612991, 'timestamp': '2025-09-10 02:43:17.065805', 'step': 7474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:17.129538', 'step': 7474, 'epoch': 2} {'type': 'loss', 'content': 0.13509780168533325, 'timestamp': '2025-09-10 02:43:17.131561', 'step': 7475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:17.186233', 'step': 7475, 'epoch': 2} {'type': 'loss', 'content': 0.20759692788124084, 'timestamp': '2025-09-10 02:43:17.192162', 'step': 7476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:17.247976', 'step': 7476, 'epoch': 2} {'type': 'loss', 'content': 0.09807084500789642, 'timestamp': '2025-09-10 02:43:17.250003', 'step': 7477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:17.304965', 'step': 7477, 'epoch': 2} {'type': 'loss', 'content': 0.19302518665790558, 'timestamp': '2025-09-10 02:43:17.306924', 'step': 7478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:17.362406', 'step': 7478, 'epoch': 2} {'type': 'loss', 'content': 0.22870950400829315, 'timestamp': '2025-09-10 02:43:17.364390', 'step': 7479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:17.420771', 'step': 7479, 'epoch': 2} {'type': 'loss', 'content': 0.08275562524795532, 'timestamp': '2025-09-10 02:43:17.427014', 'step': 7480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:17.480311', 'step': 7480, 'epoch': 2} {'type': 'loss', 'content': 0.09192582964897156, 'timestamp': '2025-09-10 02:43:17.482434', 'step': 7481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:17.535067', 'step': 7481, 'epoch': 2} {'type': 'loss', 'content': 0.1566193550825119, 'timestamp': '2025-09-10 02:43:17.536972', 'step': 7482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:17.591159', 'step': 7482, 'epoch': 2} {'type': 'loss', 'content': 0.07834386825561523, 'timestamp': '2025-09-10 02:43:17.593362', 'step': 7483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:17.646013', 'step': 7483, 'epoch': 2} {'type': 'loss', 'content': 0.17240457236766815, 'timestamp': '2025-09-10 02:43:17.651800', 'step': 7484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:17.703634', 'step': 7484, 'epoch': 2} {'type': 'loss', 'content': 0.07421993464231491, 'timestamp': '2025-09-10 02:43:17.705611', 'step': 7485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:17.758343', 'step': 7485, 'epoch': 2} {'type': 'loss', 'content': 0.1399020403623581, 'timestamp': '2025-09-10 02:43:17.760371', 'step': 7486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:17.813534', 'step': 7486, 'epoch': 2} {'type': 'loss', 'content': 0.24142234027385712, 'timestamp': '2025-09-10 02:43:17.815475', 'step': 7487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:17.868645', 'step': 7487, 'epoch': 2} {'type': 'loss', 'content': 0.12805671989917755, 'timestamp': '2025-09-10 02:43:17.874518', 'step': 7488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:17.927497', 'step': 7488, 'epoch': 2} {'type': 'loss', 'content': 0.13027583062648773, 'timestamp': '2025-09-10 02:43:17.929424', 'step': 7489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:17.982021', 'step': 7489, 'epoch': 2} {'type': 'loss', 'content': 0.07547225058078766, 'timestamp': '2025-09-10 02:43:17.983995', 'step': 7490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:18.036281', 'step': 7490, 'epoch': 2} {'type': 'loss', 'content': 0.17083711922168732, 'timestamp': '2025-09-10 02:43:18.038363', 'step': 7491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:18.090416', 'step': 7491, 'epoch': 2} {'type': 'loss', 'content': 0.08730930835008621, 'timestamp': '2025-09-10 02:43:18.096214', 'step': 7492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:18.150609', 'step': 7492, 'epoch': 2} {'type': 'loss', 'content': 0.23432859778404236, 'timestamp': '2025-09-10 02:43:18.152542', 'step': 7493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:18.204693', 'step': 7493, 'epoch': 2} {'type': 'loss', 'content': 0.13950374722480774, 'timestamp': '2025-09-10 02:43:18.206960', 'step': 7494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:18.259768', 'step': 7494, 'epoch': 2} {'type': 'loss', 'content': 0.23220521211624146, 'timestamp': '2025-09-10 02:43:18.261801', 'step': 7495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:18.315628', 'step': 7495, 'epoch': 2} {'type': 'loss', 'content': 0.08045368641614914, 'timestamp': '2025-09-10 02:43:18.321519', 'step': 7496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:18.373022', 'step': 7496, 'epoch': 2} {'type': 'loss', 'content': 0.17365549504756927, 'timestamp': '2025-09-10 02:43:18.374962', 'step': 7497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:18.428061', 'step': 7497, 'epoch': 2} {'type': 'loss', 'content': 0.11675482988357544, 'timestamp': '2025-09-10 02:43:18.430012', 'step': 7498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:18.482748', 'step': 7498, 'epoch': 2} {'type': 'loss', 'content': 0.18330630660057068, 'timestamp': '2025-09-10 02:43:18.484890', 'step': 7499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:18.538393', 'step': 7499, 'epoch': 2} {'type': 'loss', 'content': 0.14740495383739471, 'timestamp': '2025-09-10 02:43:18.544219', 'step': 7500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 7500', 'timestamp': '2025-09-10 02:43:18.927119', 'step': 7500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:18.982350', 'step': 7500, 'epoch': 2} {'type': 'loss', 'content': 0.1218903437256813, 'timestamp': '2025-09-10 02:43:18.984321', 'step': 7501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:19.037976', 'step': 7501, 'epoch': 2} {'type': 'loss', 'content': 0.1310662478208542, 'timestamp': '2025-09-10 02:43:19.039927', 'step': 7502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:19.092552', 'step': 7502, 'epoch': 2} {'type': 'loss', 'content': 0.13608446717262268, 'timestamp': '2025-09-10 02:43:19.094532', 'step': 7503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:19.147836', 'step': 7503, 'epoch': 2} {'type': 'loss', 'content': 0.15804354846477509, 'timestamp': '2025-09-10 02:43:19.153742', 'step': 7504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:19.206606', 'step': 7504, 'epoch': 2} {'type': 'loss', 'content': 0.19998912513256073, 'timestamp': '2025-09-10 02:43:19.208652', 'step': 7505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:19.261711', 'step': 7505, 'epoch': 2} {'type': 'loss', 'content': 0.16480526328086853, 'timestamp': '2025-09-10 02:43:19.263682', 'step': 7506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:19.317177', 'step': 7506, 'epoch': 2} {'type': 'loss', 'content': 0.1354464441537857, 'timestamp': '2025-09-10 02:43:19.319095', 'step': 7507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:19.372213', 'step': 7507, 'epoch': 2} {'type': 'loss', 'content': 0.22275462746620178, 'timestamp': '2025-09-10 02:43:19.378206', 'step': 7508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:19.431553', 'step': 7508, 'epoch': 2} {'type': 'loss', 'content': 0.17646296322345734, 'timestamp': '2025-09-10 02:43:19.433757', 'step': 7509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:19.486692', 'step': 7509, 'epoch': 2} {'type': 'loss', 'content': 0.09656516462564468, 'timestamp': '2025-09-10 02:43:19.488889', 'step': 7510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:19.542429', 'step': 7510, 'epoch': 2} {'type': 'loss', 'content': 0.17312860488891602, 'timestamp': '2025-09-10 02:43:19.545523', 'step': 7511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:19.598884', 'step': 7511, 'epoch': 2} {'type': 'loss', 'content': 0.22390185296535492, 'timestamp': '2025-09-10 02:43:19.605085', 'step': 7512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:19.663840', 'step': 7512, 'epoch': 2} {'type': 'loss', 'content': 0.1444714069366455, 'timestamp': '2025-09-10 02:43:19.666094', 'step': 7513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:19.719210', 'step': 7513, 'epoch': 2} {'type': 'loss', 'content': 0.1105262041091919, 'timestamp': '2025-09-10 02:43:19.721185', 'step': 7514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:19.775141', 'step': 7514, 'epoch': 2} {'type': 'loss', 'content': 0.14765428006649017, 'timestamp': '2025-09-10 02:43:19.777086', 'step': 7515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:19.830877', 'step': 7515, 'epoch': 2} {'type': 'loss', 'content': 0.09185180813074112, 'timestamp': '2025-09-10 02:43:19.836679', 'step': 7516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:19.889851', 'step': 7516, 'epoch': 2} {'type': 'loss', 'content': 0.10630843788385391, 'timestamp': '2025-09-10 02:43:19.892377', 'step': 7517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:19.945766', 'step': 7517, 'epoch': 2} {'type': 'loss', 'content': 0.16028665006160736, 'timestamp': '2025-09-10 02:43:19.947882', 'step': 7518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:20.001400', 'step': 7518, 'epoch': 2} {'type': 'loss', 'content': 0.1878216713666916, 'timestamp': '2025-09-10 02:43:20.003398', 'step': 7519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:20.056238', 'step': 7519, 'epoch': 2} {'type': 'loss', 'content': 0.16931086778640747, 'timestamp': '2025-09-10 02:43:20.063303', 'step': 7520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:20.115867', 'step': 7520, 'epoch': 2} {'type': 'loss', 'content': 0.1352955400943756, 'timestamp': '2025-09-10 02:43:20.117902', 'step': 7521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:20.172502', 'step': 7521, 'epoch': 2} {'type': 'loss', 'content': 0.11111361533403397, 'timestamp': '2025-09-10 02:43:20.174481', 'step': 7522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:20.227321', 'step': 7522, 'epoch': 2} {'type': 'loss', 'content': 0.21946872770786285, 'timestamp': '2025-09-10 02:43:20.229408', 'step': 7523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:20.283663', 'step': 7523, 'epoch': 2} {'type': 'loss', 'content': 0.10486914217472076, 'timestamp': '2025-09-10 02:43:20.294584', 'step': 7524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:20.349769', 'step': 7524, 'epoch': 2} {'type': 'loss', 'content': 0.052063506096601486, 'timestamp': '2025-09-10 02:43:20.351778', 'step': 7525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:20.404151', 'step': 7525, 'epoch': 2} {'type': 'loss', 'content': 0.14811895787715912, 'timestamp': '2025-09-10 02:43:20.407803', 'step': 7526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:20.463621', 'step': 7526, 'epoch': 2} {'type': 'loss', 'content': 0.09574619680643082, 'timestamp': '2025-09-10 02:43:20.470432', 'step': 7527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:20.523393', 'step': 7527, 'epoch': 2} {'type': 'loss', 'content': 0.1271982043981552, 'timestamp': '2025-09-10 02:43:20.529185', 'step': 7528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:20.582658', 'step': 7528, 'epoch': 2} {'type': 'loss', 'content': 0.19885246455669403, 'timestamp': '2025-09-10 02:43:20.584675', 'step': 7529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:20.637362', 'step': 7529, 'epoch': 2} {'type': 'loss', 'content': 0.18877284228801727, 'timestamp': '2025-09-10 02:43:20.639349', 'step': 7530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:20.693305', 'step': 7530, 'epoch': 2} {'type': 'loss', 'content': 0.10846177488565445, 'timestamp': '2025-09-10 02:43:20.700766', 'step': 7531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:20.756432', 'step': 7531, 'epoch': 2} {'type': 'loss', 'content': 0.12832365930080414, 'timestamp': '2025-09-10 02:43:20.762217', 'step': 7532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:20.814613', 'step': 7532, 'epoch': 2} {'type': 'loss', 'content': 0.16148601472377777, 'timestamp': '2025-09-10 02:43:20.816592', 'step': 7533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:20.874357', 'step': 7533, 'epoch': 2} {'type': 'loss', 'content': 0.09180266410112381, 'timestamp': '2025-09-10 02:43:20.876543', 'step': 7534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:20.928820', 'step': 7534, 'epoch': 2} {'type': 'loss', 'content': 0.12577825784683228, 'timestamp': '2025-09-10 02:43:20.930835', 'step': 7535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:20.998230', 'step': 7535, 'epoch': 2} {'type': 'loss', 'content': 0.24715030193328857, 'timestamp': '2025-09-10 02:43:21.005023', 'step': 7536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:21.060949', 'step': 7536, 'epoch': 2} {'type': 'loss', 'content': 0.12738658487796783, 'timestamp': '2025-09-10 02:43:21.063096', 'step': 7537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:21.118230', 'step': 7537, 'epoch': 2} {'type': 'loss', 'content': 0.12519986927509308, 'timestamp': '2025-09-10 02:43:21.120335', 'step': 7538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:21.174843', 'step': 7538, 'epoch': 2} {'type': 'loss', 'content': 0.1556658148765564, 'timestamp': '2025-09-10 02:43:21.176864', 'step': 7539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:21.236196', 'step': 7539, 'epoch': 2} {'type': 'loss', 'content': 0.1520584374666214, 'timestamp': '2025-09-10 02:43:21.242012', 'step': 7540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:21.294377', 'step': 7540, 'epoch': 2} {'type': 'loss', 'content': 0.12272186577320099, 'timestamp': '2025-09-10 02:43:21.296359', 'step': 7541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:21.349926', 'step': 7541, 'epoch': 2} {'type': 'loss', 'content': 0.15532782673835754, 'timestamp': '2025-09-10 02:43:21.352948', 'step': 7542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:21.406691', 'step': 7542, 'epoch': 2} {'type': 'loss', 'content': 0.13858690857887268, 'timestamp': '2025-09-10 02:43:21.408670', 'step': 7543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:21.462325', 'step': 7543, 'epoch': 2} {'type': 'loss', 'content': 0.13657957315444946, 'timestamp': '2025-09-10 02:43:21.468116', 'step': 7544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:21.521133', 'step': 7544, 'epoch': 2} {'type': 'loss', 'content': 0.11277735978364944, 'timestamp': '2025-09-10 02:43:21.523205', 'step': 7545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:21.575938', 'step': 7545, 'epoch': 2} {'type': 'loss', 'content': 0.11463971436023712, 'timestamp': '2025-09-10 02:43:21.577924', 'step': 7546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:21.630895', 'step': 7546, 'epoch': 2} {'type': 'loss', 'content': 0.1873481720685959, 'timestamp': '2025-09-10 02:43:21.632911', 'step': 7547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:21.685430', 'step': 7547, 'epoch': 2} {'type': 'loss', 'content': 0.2266542911529541, 'timestamp': '2025-09-10 02:43:21.691192', 'step': 7548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:21.744082', 'step': 7548, 'epoch': 2} {'type': 'loss', 'content': 0.07987796515226364, 'timestamp': '2025-09-10 02:43:21.746044', 'step': 7549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:21.799398', 'step': 7549, 'epoch': 2} {'type': 'loss', 'content': 0.2605292499065399, 'timestamp': '2025-09-10 02:43:21.801350', 'step': 7550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:21.854240', 'step': 7550, 'epoch': 2} {'type': 'loss', 'content': 0.08987485617399216, 'timestamp': '2025-09-10 02:43:21.856211', 'step': 7551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:21.909196', 'step': 7551, 'epoch': 2} {'type': 'loss', 'content': 0.11789033561944962, 'timestamp': '2025-09-10 02:43:21.915095', 'step': 7552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:21.967700', 'step': 7552, 'epoch': 2} {'type': 'loss', 'content': 0.13933086395263672, 'timestamp': '2025-09-10 02:43:21.970035', 'step': 7553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:22.022921', 'step': 7553, 'epoch': 2} {'type': 'loss', 'content': 0.10551473498344421, 'timestamp': '2025-09-10 02:43:22.024998', 'step': 7554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:22.077707', 'step': 7554, 'epoch': 2} {'type': 'loss', 'content': 0.10072563588619232, 'timestamp': '2025-09-10 02:43:22.079784', 'step': 7555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:22.133450', 'step': 7555, 'epoch': 2} {'type': 'loss', 'content': 0.18208946287631989, 'timestamp': '2025-09-10 02:43:22.139215', 'step': 7556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:22.193337', 'step': 7556, 'epoch': 2} {'type': 'loss', 'content': 0.12373577803373337, 'timestamp': '2025-09-10 02:43:22.196822', 'step': 7557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:22.271672', 'step': 7557, 'epoch': 2} {'type': 'loss', 'content': 0.15911425650119781, 'timestamp': '2025-09-10 02:43:22.273904', 'step': 7558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:22.348906', 'step': 7558, 'epoch': 2} {'type': 'loss', 'content': 0.1252823770046234, 'timestamp': '2025-09-10 02:43:22.351298', 'step': 7559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:22.411232', 'step': 7559, 'epoch': 2} {'type': 'loss', 'content': 0.16319914162158966, 'timestamp': '2025-09-10 02:43:22.417222', 'step': 7560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:22.471647', 'step': 7560, 'epoch': 2} {'type': 'loss', 'content': 0.1834283024072647, 'timestamp': '2025-09-10 02:43:22.473595', 'step': 7561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:22.531425', 'step': 7561, 'epoch': 2} {'type': 'loss', 'content': 0.1343451589345932, 'timestamp': '2025-09-10 02:43:22.533406', 'step': 7562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:22.587238', 'step': 7562, 'epoch': 2} {'type': 'loss', 'content': 0.18964499235153198, 'timestamp': '2025-09-10 02:43:22.589345', 'step': 7563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:22.642409', 'step': 7563, 'epoch': 2} {'type': 'loss', 'content': 0.17939257621765137, 'timestamp': '2025-09-10 02:43:22.648147', 'step': 7564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:22.701117', 'step': 7564, 'epoch': 2} {'type': 'loss', 'content': 0.17548762261867523, 'timestamp': '2025-09-10 02:43:22.703180', 'step': 7565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:22.757393', 'step': 7565, 'epoch': 2} {'type': 'loss', 'content': 0.14037808775901794, 'timestamp': '2025-09-10 02:43:22.759643', 'step': 7566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:22.813478', 'step': 7566, 'epoch': 2} {'type': 'loss', 'content': 0.1621711701154709, 'timestamp': '2025-09-10 02:43:22.815580', 'step': 7567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:22.882391', 'step': 7567, 'epoch': 2} {'type': 'loss', 'content': 0.13377147912979126, 'timestamp': '2025-09-10 02:43:22.888300', 'step': 7568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:22.941622', 'step': 7568, 'epoch': 2} {'type': 'loss', 'content': 0.10641397535800934, 'timestamp': '2025-09-10 02:43:22.943631', 'step': 7569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:22.997967', 'step': 7569, 'epoch': 2} {'type': 'loss', 'content': 0.21044404804706573, 'timestamp': '2025-09-10 02:43:22.999908', 'step': 7570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:23.054576', 'step': 7570, 'epoch': 2} {'type': 'loss', 'content': 0.10976336896419525, 'timestamp': '2025-09-10 02:43:23.056696', 'step': 7571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:23.111742', 'step': 7571, 'epoch': 2} {'type': 'loss', 'content': 0.09404356777667999, 'timestamp': '2025-09-10 02:43:23.117470', 'step': 7572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:23.170776', 'step': 7572, 'epoch': 2} {'type': 'loss', 'content': 0.14625732600688934, 'timestamp': '2025-09-10 02:43:23.172707', 'step': 7573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:23.226010', 'step': 7573, 'epoch': 2} {'type': 'loss', 'content': 0.26474809646606445, 'timestamp': '2025-09-10 02:43:23.228165', 'step': 7574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:23.281796', 'step': 7574, 'epoch': 2} {'type': 'loss', 'content': 0.10204509645700455, 'timestamp': '2025-09-10 02:43:23.283857', 'step': 7575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:23.337691', 'step': 7575, 'epoch': 2} {'type': 'loss', 'content': 0.21254758536815643, 'timestamp': '2025-09-10 02:43:23.343526', 'step': 7576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:23.395762', 'step': 7576, 'epoch': 2} {'type': 'loss', 'content': 0.16881881654262543, 'timestamp': '2025-09-10 02:43:23.397797', 'step': 7577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:23.450206', 'step': 7577, 'epoch': 2} {'type': 'loss', 'content': 0.17894573509693146, 'timestamp': '2025-09-10 02:43:23.452211', 'step': 7578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:23.507163', 'step': 7578, 'epoch': 2} {'type': 'loss', 'content': 0.08356880396604538, 'timestamp': '2025-09-10 02:43:23.509342', 'step': 7579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:23.561678', 'step': 7579, 'epoch': 2} {'type': 'loss', 'content': 0.24867740273475647, 'timestamp': '2025-09-10 02:43:23.567509', 'step': 7580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:23.620596', 'step': 7580, 'epoch': 2} {'type': 'loss', 'content': 0.17955085635185242, 'timestamp': '2025-09-10 02:43:23.622783', 'step': 7581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:23.676035', 'step': 7581, 'epoch': 2} {'type': 'loss', 'content': 0.13996842503547668, 'timestamp': '2025-09-10 02:43:23.678315', 'step': 7582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:23.732100', 'step': 7582, 'epoch': 2} {'type': 'loss', 'content': 0.2423391342163086, 'timestamp': '2025-09-10 02:43:23.734078', 'step': 7583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:23.788548', 'step': 7583, 'epoch': 2} {'type': 'loss', 'content': 0.12141112983226776, 'timestamp': '2025-09-10 02:43:23.794494', 'step': 7584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:23.846640', 'step': 7584, 'epoch': 2} {'type': 'loss', 'content': 0.11828716844320297, 'timestamp': '2025-09-10 02:43:23.848596', 'step': 7585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:23.901246', 'step': 7585, 'epoch': 2} {'type': 'loss', 'content': 0.0887557715177536, 'timestamp': '2025-09-10 02:43:23.903233', 'step': 7586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:23.955823', 'step': 7586, 'epoch': 2} {'type': 'loss', 'content': 0.1454235464334488, 'timestamp': '2025-09-10 02:43:23.957879', 'step': 7587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:24.010631', 'step': 7587, 'epoch': 2} {'type': 'loss', 'content': 0.2900533974170685, 'timestamp': '2025-09-10 02:43:24.016417', 'step': 7588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:24.069276', 'step': 7588, 'epoch': 2} {'type': 'loss', 'content': 0.1718011349439621, 'timestamp': '2025-09-10 02:43:24.071350', 'step': 7589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:24.124336', 'step': 7589, 'epoch': 2} {'type': 'loss', 'content': 0.1596325784921646, 'timestamp': '2025-09-10 02:43:24.126323', 'step': 7590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:24.178680', 'step': 7590, 'epoch': 2} {'type': 'loss', 'content': 0.11343474686145782, 'timestamp': '2025-09-10 02:43:24.180638', 'step': 7591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:24.232917', 'step': 7591, 'epoch': 2} {'type': 'loss', 'content': 0.15405727922916412, 'timestamp': '2025-09-10 02:43:24.238672', 'step': 7592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:24.291123', 'step': 7592, 'epoch': 2} {'type': 'loss', 'content': 0.1963440477848053, 'timestamp': '2025-09-10 02:43:24.293131', 'step': 7593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:24.347615', 'step': 7593, 'epoch': 2} {'type': 'loss', 'content': 0.24207766354084015, 'timestamp': '2025-09-10 02:43:24.349629', 'step': 7594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:24.403265', 'step': 7594, 'epoch': 2} {'type': 'loss', 'content': 0.14828236401081085, 'timestamp': '2025-09-10 02:43:24.405444', 'step': 7595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:24.458338', 'step': 7595, 'epoch': 2} {'type': 'loss', 'content': 0.1293342262506485, 'timestamp': '2025-09-10 02:43:24.464337', 'step': 7596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:24.517704', 'step': 7596, 'epoch': 2} {'type': 'loss', 'content': 0.1736745834350586, 'timestamp': '2025-09-10 02:43:24.519899', 'step': 7597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:24.574744', 'step': 7597, 'epoch': 2} {'type': 'loss', 'content': 0.1435343325138092, 'timestamp': '2025-09-10 02:43:24.576903', 'step': 7598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:24.630579', 'step': 7598, 'epoch': 2} {'type': 'loss', 'content': 0.14588704705238342, 'timestamp': '2025-09-10 02:43:24.632523', 'step': 7599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:24.686442', 'step': 7599, 'epoch': 2} {'type': 'loss', 'content': 0.23355615139007568, 'timestamp': '2025-09-10 02:43:24.692258', 'step': 7600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:24.744454', 'step': 7600, 'epoch': 2} {'type': 'loss', 'content': 0.16611185669898987, 'timestamp': '2025-09-10 02:43:24.746517', 'step': 7601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:24.799666', 'step': 7601, 'epoch': 2} {'type': 'loss', 'content': 0.15611697733402252, 'timestamp': '2025-09-10 02:43:24.801666', 'step': 7602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:24.855619', 'step': 7602, 'epoch': 2} {'type': 'loss', 'content': 0.18012571334838867, 'timestamp': '2025-09-10 02:43:24.857770', 'step': 7603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:24.911886', 'step': 7603, 'epoch': 2} {'type': 'loss', 'content': 0.16144970059394836, 'timestamp': '2025-09-10 02:43:24.917864', 'step': 7604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:24.970839', 'step': 7604, 'epoch': 2} {'type': 'loss', 'content': 0.13235487043857574, 'timestamp': '2025-09-10 02:43:24.972775', 'step': 7605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:25.029497', 'step': 7605, 'epoch': 2} {'type': 'loss', 'content': 0.20482586324214935, 'timestamp': '2025-09-10 02:43:25.031434', 'step': 7606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:25.084450', 'step': 7606, 'epoch': 2} {'type': 'loss', 'content': 0.14439134299755096, 'timestamp': '2025-09-10 02:43:25.086497', 'step': 7607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:25.142137', 'step': 7607, 'epoch': 2} {'type': 'loss', 'content': 0.1430520862340927, 'timestamp': '2025-09-10 02:43:25.147871', 'step': 7608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:25.200402', 'step': 7608, 'epoch': 2} {'type': 'loss', 'content': 0.07988379895687103, 'timestamp': '2025-09-10 02:43:25.202342', 'step': 7609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:25.255657', 'step': 7609, 'epoch': 2} {'type': 'loss', 'content': 0.14435501396656036, 'timestamp': '2025-09-10 02:43:25.257699', 'step': 7610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:25.312683', 'step': 7610, 'epoch': 2} {'type': 'loss', 'content': 0.10568222403526306, 'timestamp': '2025-09-10 02:43:25.314688', 'step': 7611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:25.367717', 'step': 7611, 'epoch': 2} {'type': 'loss', 'content': 0.09713058918714523, 'timestamp': '2025-09-10 02:43:25.373659', 'step': 7612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:25.426826', 'step': 7612, 'epoch': 2} {'type': 'loss', 'content': 0.13453252613544464, 'timestamp': '2025-09-10 02:43:25.428789', 'step': 7613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:25.481246', 'step': 7613, 'epoch': 2} {'type': 'loss', 'content': 0.1809508204460144, 'timestamp': '2025-09-10 02:43:25.483148', 'step': 7614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:25.535690', 'step': 7614, 'epoch': 2} {'type': 'loss', 'content': 0.2850707769393921, 'timestamp': '2025-09-10 02:43:25.537626', 'step': 7615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:25.589933', 'step': 7615, 'epoch': 2} {'type': 'loss', 'content': 0.1816091239452362, 'timestamp': '2025-09-10 02:43:25.595785', 'step': 7616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:25.647632', 'step': 7616, 'epoch': 2} {'type': 'loss', 'content': 0.0904896929860115, 'timestamp': '2025-09-10 02:43:25.649555', 'step': 7617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:25.702781', 'step': 7617, 'epoch': 2} {'type': 'loss', 'content': 0.18403242528438568, 'timestamp': '2025-09-10 02:43:25.704742', 'step': 7618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:25.756811', 'step': 7618, 'epoch': 2} {'type': 'loss', 'content': 0.12791316211223602, 'timestamp': '2025-09-10 02:43:25.758680', 'step': 7619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:25.811528', 'step': 7619, 'epoch': 2} {'type': 'loss', 'content': 0.1474718600511551, 'timestamp': '2025-09-10 02:43:25.817208', 'step': 7620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:25.868991', 'step': 7620, 'epoch': 2} {'type': 'loss', 'content': 0.18796388804912567, 'timestamp': '2025-09-10 02:43:25.870986', 'step': 7621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:25.924214', 'step': 7621, 'epoch': 2} {'type': 'loss', 'content': 0.22457817196846008, 'timestamp': '2025-09-10 02:43:25.926308', 'step': 7622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:25.979839', 'step': 7622, 'epoch': 2} {'type': 'loss', 'content': 0.1268094778060913, 'timestamp': '2025-09-10 02:43:25.981800', 'step': 7623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:26.034330', 'step': 7623, 'epoch': 2} {'type': 'loss', 'content': 0.12167438119649887, 'timestamp': '2025-09-10 02:43:26.040568', 'step': 7624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:26.093688', 'step': 7624, 'epoch': 2} {'type': 'loss', 'content': 0.20126047730445862, 'timestamp': '2025-09-10 02:43:26.095933', 'step': 7625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:26.149118', 'step': 7625, 'epoch': 2} {'type': 'loss', 'content': 0.11652764678001404, 'timestamp': '2025-09-10 02:43:26.151067', 'step': 7626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:26.204343', 'step': 7626, 'epoch': 2} {'type': 'loss', 'content': 0.07527735084295273, 'timestamp': '2025-09-10 02:43:26.206487', 'step': 7627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:26.260635', 'step': 7627, 'epoch': 2} {'type': 'loss', 'content': 0.12481743842363358, 'timestamp': '2025-09-10 02:43:26.266572', 'step': 7628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:26.322605', 'step': 7628, 'epoch': 2} {'type': 'loss', 'content': 0.08612896502017975, 'timestamp': '2025-09-10 02:43:26.324886', 'step': 7629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:26.377885', 'step': 7629, 'epoch': 2} {'type': 'loss', 'content': 0.12734773755073547, 'timestamp': '2025-09-10 02:43:26.379794', 'step': 7630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:26.433789', 'step': 7630, 'epoch': 2} {'type': 'loss', 'content': 0.12059438228607178, 'timestamp': '2025-09-10 02:43:26.435732', 'step': 7631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:26.488935', 'step': 7631, 'epoch': 2} {'type': 'loss', 'content': 0.18618901073932648, 'timestamp': '2025-09-10 02:43:26.494718', 'step': 7632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:26.546737', 'step': 7632, 'epoch': 2} {'type': 'loss', 'content': 0.23234806954860687, 'timestamp': '2025-09-10 02:43:26.548714', 'step': 7633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:26.601879', 'step': 7633, 'epoch': 2} {'type': 'loss', 'content': 0.1744959056377411, 'timestamp': '2025-09-10 02:43:26.603948', 'step': 7634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:26.656913', 'step': 7634, 'epoch': 2} {'type': 'loss', 'content': 0.18699578940868378, 'timestamp': '2025-09-10 02:43:26.658883', 'step': 7635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:26.712100', 'step': 7635, 'epoch': 2} {'type': 'loss', 'content': 0.24351456761360168, 'timestamp': '2025-09-10 02:43:26.717835', 'step': 7636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:26.770047', 'step': 7636, 'epoch': 2} {'type': 'loss', 'content': 0.17283934354782104, 'timestamp': '2025-09-10 02:43:26.771995', 'step': 7637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:26.825307', 'step': 7637, 'epoch': 2} {'type': 'loss', 'content': 0.07703916728496552, 'timestamp': '2025-09-10 02:43:26.827450', 'step': 7638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:26.880450', 'step': 7638, 'epoch': 2} {'type': 'loss', 'content': 0.2255774289369583, 'timestamp': '2025-09-10 02:43:26.883945', 'step': 7639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:26.938525', 'step': 7639, 'epoch': 2} {'type': 'loss', 'content': 0.12791121006011963, 'timestamp': '2025-09-10 02:43:26.944326', 'step': 7640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:26.997747', 'step': 7640, 'epoch': 2} {'type': 'loss', 'content': 0.1504168063402176, 'timestamp': '2025-09-10 02:43:26.999768', 'step': 7641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:27.052606', 'step': 7641, 'epoch': 2} {'type': 'loss', 'content': 0.13255387544631958, 'timestamp': '2025-09-10 02:43:27.054523', 'step': 7642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:27.107063', 'step': 7642, 'epoch': 2} {'type': 'loss', 'content': 0.1922920197248459, 'timestamp': '2025-09-10 02:43:27.109111', 'step': 7643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:27.161419', 'step': 7643, 'epoch': 2} {'type': 'loss', 'content': 0.11517283320426941, 'timestamp': '2025-09-10 02:43:27.167468', 'step': 7644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:27.219706', 'step': 7644, 'epoch': 2} {'type': 'loss', 'content': 0.1631893366575241, 'timestamp': '2025-09-10 02:43:27.221670', 'step': 7645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:27.273488', 'step': 7645, 'epoch': 2} {'type': 'loss', 'content': 0.11266212165355682, 'timestamp': '2025-09-10 02:43:27.275465', 'step': 7646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:27.328142', 'step': 7646, 'epoch': 2} {'type': 'loss', 'content': 0.12261807173490524, 'timestamp': '2025-09-10 02:43:27.330087', 'step': 7647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:27.382295', 'step': 7647, 'epoch': 2} {'type': 'loss', 'content': 0.1619158238172531, 'timestamp': '2025-09-10 02:43:27.388027', 'step': 7648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:27.440560', 'step': 7648, 'epoch': 2} {'type': 'loss', 'content': 0.1823008507490158, 'timestamp': '2025-09-10 02:43:27.442455', 'step': 7649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:27.495389', 'step': 7649, 'epoch': 2} {'type': 'loss', 'content': 0.21116389334201813, 'timestamp': '2025-09-10 02:43:27.497389', 'step': 7650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:27.551504', 'step': 7650, 'epoch': 2} {'type': 'loss', 'content': 0.08367735892534256, 'timestamp': '2025-09-10 02:43:27.553204', 'step': 7651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:27.606695', 'step': 7651, 'epoch': 2} {'type': 'loss', 'content': 0.24123451113700867, 'timestamp': '2025-09-10 02:43:27.612557', 'step': 7652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:27.665930', 'step': 7652, 'epoch': 2} {'type': 'loss', 'content': 0.18220254778862, 'timestamp': '2025-09-10 02:43:27.668054', 'step': 7653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:27.720898', 'step': 7653, 'epoch': 2} {'type': 'loss', 'content': 0.1947084218263626, 'timestamp': '2025-09-10 02:43:27.723083', 'step': 7654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:27.776815', 'step': 7654, 'epoch': 2} {'type': 'loss', 'content': 0.17713633179664612, 'timestamp': '2025-09-10 02:43:27.778909', 'step': 7655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:27.832610', 'step': 7655, 'epoch': 2} {'type': 'loss', 'content': 0.13320580124855042, 'timestamp': '2025-09-10 02:43:27.838410', 'step': 7656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:27.890647', 'step': 7656, 'epoch': 2} {'type': 'loss', 'content': 0.16972915828227997, 'timestamp': '2025-09-10 02:43:27.892552', 'step': 7657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:27.945858', 'step': 7657, 'epoch': 2} {'type': 'loss', 'content': 0.1799769550561905, 'timestamp': '2025-09-10 02:43:27.947789', 'step': 7658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:28.000348', 'step': 7658, 'epoch': 2} {'type': 'loss', 'content': 0.07380852848291397, 'timestamp': '2025-09-10 02:43:28.002387', 'step': 7659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:28.057323', 'step': 7659, 'epoch': 2} {'type': 'loss', 'content': 0.13032643496990204, 'timestamp': '2025-09-10 02:43:28.063242', 'step': 7660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:28.116651', 'step': 7660, 'epoch': 2} {'type': 'loss', 'content': 0.2321731001138687, 'timestamp': '2025-09-10 02:43:28.118688', 'step': 7661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:28.171781', 'step': 7661, 'epoch': 2} {'type': 'loss', 'content': 0.10071560740470886, 'timestamp': '2025-09-10 02:43:28.174017', 'step': 7662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:28.228235', 'step': 7662, 'epoch': 2} {'type': 'loss', 'content': 0.12019255757331848, 'timestamp': '2025-09-10 02:43:28.230222', 'step': 7663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:28.282837', 'step': 7663, 'epoch': 2} {'type': 'loss', 'content': 0.1806209534406662, 'timestamp': '2025-09-10 02:43:28.288581', 'step': 7664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:28.340969', 'step': 7664, 'epoch': 2} {'type': 'loss', 'content': 0.22044479846954346, 'timestamp': '2025-09-10 02:43:28.342958', 'step': 7665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:28.395727', 'step': 7665, 'epoch': 2} {'type': 'loss', 'content': 0.10911910980939865, 'timestamp': '2025-09-10 02:43:28.397775', 'step': 7666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:28.451828', 'step': 7666, 'epoch': 2} {'type': 'loss', 'content': 0.1884971708059311, 'timestamp': '2025-09-10 02:43:28.453954', 'step': 7667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:28.507295', 'step': 7667, 'epoch': 2} {'type': 'loss', 'content': 0.16449862718582153, 'timestamp': '2025-09-10 02:43:28.513229', 'step': 7668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:28.565557', 'step': 7668, 'epoch': 2} {'type': 'loss', 'content': 0.16627603769302368, 'timestamp': '2025-09-10 02:43:28.567612', 'step': 7669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:28.621329', 'step': 7669, 'epoch': 2} {'type': 'loss', 'content': 0.29476526379585266, 'timestamp': '2025-09-10 02:43:28.623349', 'step': 7670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:28.676012', 'step': 7670, 'epoch': 2} {'type': 'loss', 'content': 0.13055288791656494, 'timestamp': '2025-09-10 02:43:28.677959', 'step': 7671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:28.730933', 'step': 7671, 'epoch': 2} {'type': 'loss', 'content': 0.16575796902179718, 'timestamp': '2025-09-10 02:43:28.736770', 'step': 7672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:28.789148', 'step': 7672, 'epoch': 2} {'type': 'loss', 'content': 0.135324165225029, 'timestamp': '2025-09-10 02:43:28.791128', 'step': 7673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:28.843783', 'step': 7673, 'epoch': 2} {'type': 'loss', 'content': 0.24592146277427673, 'timestamp': '2025-09-10 02:43:28.845821', 'step': 7674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:28.899026', 'step': 7674, 'epoch': 2} {'type': 'loss', 'content': 0.1360437273979187, 'timestamp': '2025-09-10 02:43:28.901000', 'step': 7675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:28.954644', 'step': 7675, 'epoch': 2} {'type': 'loss', 'content': 0.2287684679031372, 'timestamp': '2025-09-10 02:43:28.960651', 'step': 7676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:29.016175', 'step': 7676, 'epoch': 2} {'type': 'loss', 'content': 0.16350722312927246, 'timestamp': '2025-09-10 02:43:29.018101', 'step': 7677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:29.072794', 'step': 7677, 'epoch': 2} {'type': 'loss', 'content': 0.17253278195858002, 'timestamp': '2025-09-10 02:43:29.074784', 'step': 7678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:29.131183', 'step': 7678, 'epoch': 2} {'type': 'loss', 'content': 0.1845908910036087, 'timestamp': '2025-09-10 02:43:29.133298', 'step': 7679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:29.188201', 'step': 7679, 'epoch': 2} {'type': 'loss', 'content': 0.14926838874816895, 'timestamp': '2025-09-10 02:43:29.194235', 'step': 7680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:29.247781', 'step': 7680, 'epoch': 2} {'type': 'loss', 'content': 0.1844731718301773, 'timestamp': '2025-09-10 02:43:29.249670', 'step': 7681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:29.309782', 'step': 7681, 'epoch': 2} {'type': 'loss', 'content': 0.08846896886825562, 'timestamp': '2025-09-10 02:43:29.311883', 'step': 7682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:29.367426', 'step': 7682, 'epoch': 2} {'type': 'loss', 'content': 0.1946190893650055, 'timestamp': '2025-09-10 02:43:29.369792', 'step': 7683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:29.423813', 'step': 7683, 'epoch': 2} {'type': 'loss', 'content': 0.09098111093044281, 'timestamp': '2025-09-10 02:43:29.429781', 'step': 7684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:29.484226', 'step': 7684, 'epoch': 2} {'type': 'loss', 'content': 0.12373402714729309, 'timestamp': '2025-09-10 02:43:29.486414', 'step': 7685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:29.539866', 'step': 7685, 'epoch': 2} {'type': 'loss', 'content': 0.15987519919872284, 'timestamp': '2025-09-10 02:43:29.541684', 'step': 7686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:29.594340', 'step': 7686, 'epoch': 2} {'type': 'loss', 'content': 0.18600106239318848, 'timestamp': '2025-09-10 02:43:29.596349', 'step': 7687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:29.649722', 'step': 7687, 'epoch': 2} {'type': 'loss', 'content': 0.14414657652378082, 'timestamp': '2025-09-10 02:43:29.655864', 'step': 7688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:29.709386', 'step': 7688, 'epoch': 2} {'type': 'loss', 'content': 0.17627379298210144, 'timestamp': '2025-09-10 02:43:29.711405', 'step': 7689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:29.764820', 'step': 7689, 'epoch': 2} {'type': 'loss', 'content': 0.10670753568410873, 'timestamp': '2025-09-10 02:43:29.767003', 'step': 7690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:29.820449', 'step': 7690, 'epoch': 2} {'type': 'loss', 'content': 0.11246651411056519, 'timestamp': '2025-09-10 02:43:29.822408', 'step': 7691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:29.876691', 'step': 7691, 'epoch': 2} {'type': 'loss', 'content': 0.2312270849943161, 'timestamp': '2025-09-10 02:43:29.882659', 'step': 7692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:29.935414', 'step': 7692, 'epoch': 2} {'type': 'loss', 'content': 0.16788430511951447, 'timestamp': '2025-09-10 02:43:29.937334', 'step': 7693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:29.991274', 'step': 7693, 'epoch': 2} {'type': 'loss', 'content': 0.13251669704914093, 'timestamp': '2025-09-10 02:43:29.993329', 'step': 7694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:30.046159', 'step': 7694, 'epoch': 2} {'type': 'loss', 'content': 0.21809960901737213, 'timestamp': '2025-09-10 02:43:30.048105', 'step': 7695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:30.100737', 'step': 7695, 'epoch': 2} {'type': 'loss', 'content': 0.10329136252403259, 'timestamp': '2025-09-10 02:43:30.106646', 'step': 7696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:30.159649', 'step': 7696, 'epoch': 2} {'type': 'loss', 'content': 0.19029496610164642, 'timestamp': '2025-09-10 02:43:30.165464', 'step': 7697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:30.219556', 'step': 7697, 'epoch': 2} {'type': 'loss', 'content': 0.25195711851119995, 'timestamp': '2025-09-10 02:43:30.221488', 'step': 7698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:30.275339', 'step': 7698, 'epoch': 2} {'type': 'loss', 'content': 0.17351461946964264, 'timestamp': '2025-09-10 02:43:30.277290', 'step': 7699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:30.331007', 'step': 7699, 'epoch': 2} {'type': 'loss', 'content': 0.08681757003068924, 'timestamp': '2025-09-10 02:43:30.336807', 'step': 7700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:30.389331', 'step': 7700, 'epoch': 2} {'type': 'loss', 'content': 0.220508873462677, 'timestamp': '2025-09-10 02:43:30.391626', 'step': 7701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:30.444546', 'step': 7701, 'epoch': 2} {'type': 'loss', 'content': 0.1137116551399231, 'timestamp': '2025-09-10 02:43:30.446530', 'step': 7702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:30.499421', 'step': 7702, 'epoch': 2} {'type': 'loss', 'content': 0.31125038862228394, 'timestamp': '2025-09-10 02:43:30.501399', 'step': 7703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:30.554688', 'step': 7703, 'epoch': 2} {'type': 'loss', 'content': 0.17874307930469513, 'timestamp': '2025-09-10 02:43:30.560846', 'step': 7704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:30.613596', 'step': 7704, 'epoch': 2} {'type': 'loss', 'content': 0.20321720838546753, 'timestamp': '2025-09-10 02:43:30.617846', 'step': 7705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:30.671619', 'step': 7705, 'epoch': 2} {'type': 'loss', 'content': 0.23120473325252533, 'timestamp': '2025-09-10 02:43:30.673636', 'step': 7706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:30.729914', 'step': 7706, 'epoch': 2} {'type': 'loss', 'content': 0.08064346015453339, 'timestamp': '2025-09-10 02:43:30.734404', 'step': 7707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:30.799157', 'step': 7707, 'epoch': 2} {'type': 'loss', 'content': 0.10325810313224792, 'timestamp': '2025-09-10 02:43:30.804804', 'step': 7708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:30.858151', 'step': 7708, 'epoch': 2} {'type': 'loss', 'content': 0.1356067657470703, 'timestamp': '2025-09-10 02:43:30.860296', 'step': 7709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:30.919692', 'step': 7709, 'epoch': 2} {'type': 'loss', 'content': 0.15205654501914978, 'timestamp': '2025-09-10 02:43:30.921546', 'step': 7710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:30.977383', 'step': 7710, 'epoch': 2} {'type': 'loss', 'content': 0.11050912737846375, 'timestamp': '2025-09-10 02:43:30.979788', 'step': 7711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:31.037498', 'step': 7711, 'epoch': 2} {'type': 'loss', 'content': 0.16828030347824097, 'timestamp': '2025-09-10 02:43:31.044053', 'step': 7712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:31.106318', 'step': 7712, 'epoch': 2} {'type': 'loss', 'content': 0.20733873546123505, 'timestamp': '2025-09-10 02:43:31.108553', 'step': 7713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:31.162768', 'step': 7713, 'epoch': 2} {'type': 'loss', 'content': 0.09761592745780945, 'timestamp': '2025-09-10 02:43:31.166425', 'step': 7714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:31.222967', 'step': 7714, 'epoch': 2} {'type': 'loss', 'content': 0.10749802738428116, 'timestamp': '2025-09-10 02:43:31.225248', 'step': 7715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:31.290161', 'step': 7715, 'epoch': 2} {'type': 'loss', 'content': 0.08827786147594452, 'timestamp': '2025-09-10 02:43:31.297604', 'step': 7716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:31.359727', 'step': 7716, 'epoch': 2} {'type': 'loss', 'content': 0.19000892341136932, 'timestamp': '2025-09-10 02:43:31.361623', 'step': 7717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:31.416834', 'step': 7717, 'epoch': 2} {'type': 'loss', 'content': 0.12045363336801529, 'timestamp': '2025-09-10 02:43:31.420076', 'step': 7718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:31.477959', 'step': 7718, 'epoch': 2} {'type': 'loss', 'content': 0.10971520841121674, 'timestamp': '2025-09-10 02:43:31.480410', 'step': 7719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:31.536282', 'step': 7719, 'epoch': 2} {'type': 'loss', 'content': 0.2119690626859665, 'timestamp': '2025-09-10 02:43:31.542693', 'step': 7720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:31.601077', 'step': 7720, 'epoch': 2} {'type': 'loss', 'content': 0.1257937103509903, 'timestamp': '2025-09-10 02:43:31.603346', 'step': 7721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:31.661881', 'step': 7721, 'epoch': 2} {'type': 'loss', 'content': 0.0869964137673378, 'timestamp': '2025-09-10 02:43:31.664112', 'step': 7722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:31.725322', 'step': 7722, 'epoch': 2} {'type': 'loss', 'content': 0.1513976752758026, 'timestamp': '2025-09-10 02:43:31.727443', 'step': 7723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:31.782757', 'step': 7723, 'epoch': 2} {'type': 'loss', 'content': 0.17760981619358063, 'timestamp': '2025-09-10 02:43:31.789253', 'step': 7724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:31.846692', 'step': 7724, 'epoch': 2} {'type': 'loss', 'content': 0.15396711230278015, 'timestamp': '2025-09-10 02:43:31.849010', 'step': 7725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:31.903647', 'step': 7725, 'epoch': 2} {'type': 'loss', 'content': 0.09663638472557068, 'timestamp': '2025-09-10 02:43:31.905941', 'step': 7726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:31.960723', 'step': 7726, 'epoch': 2} {'type': 'loss', 'content': 0.09587053954601288, 'timestamp': '2025-09-10 02:43:31.962942', 'step': 7727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:32.017444', 'step': 7727, 'epoch': 2} {'type': 'loss', 'content': 0.21506968140602112, 'timestamp': '2025-09-10 02:43:32.023683', 'step': 7728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:32.077841', 'step': 7728, 'epoch': 2} {'type': 'loss', 'content': 0.13769903779029846, 'timestamp': '2025-09-10 02:43:32.080099', 'step': 7729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:32.134272', 'step': 7729, 'epoch': 2} {'type': 'loss', 'content': 0.09134705364704132, 'timestamp': '2025-09-10 02:43:32.136465', 'step': 7730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:32.191194', 'step': 7730, 'epoch': 2} {'type': 'loss', 'content': 0.22205308079719543, 'timestamp': '2025-09-10 02:43:32.193422', 'step': 7731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:32.247771', 'step': 7731, 'epoch': 2} {'type': 'loss', 'content': 0.1933499127626419, 'timestamp': '2025-09-10 02:43:32.253885', 'step': 7732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:32.307233', 'step': 7732, 'epoch': 2} {'type': 'loss', 'content': 0.08825797587633133, 'timestamp': '2025-09-10 02:43:32.309300', 'step': 7733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:32.363762', 'step': 7733, 'epoch': 2} {'type': 'loss', 'content': 0.1442277580499649, 'timestamp': '2025-09-10 02:43:32.365944', 'step': 7734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:32.420266', 'step': 7734, 'epoch': 2} {'type': 'loss', 'content': 0.12460270524024963, 'timestamp': '2025-09-10 02:43:32.422245', 'step': 7735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:32.476525', 'step': 7735, 'epoch': 2} {'type': 'loss', 'content': 0.07528237253427505, 'timestamp': '2025-09-10 02:43:32.482755', 'step': 7736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:32.536656', 'step': 7736, 'epoch': 2} {'type': 'loss', 'content': 0.1411186009645462, 'timestamp': '2025-09-10 02:43:32.538925', 'step': 7737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:32.593424', 'step': 7737, 'epoch': 2} {'type': 'loss', 'content': 0.15609022974967957, 'timestamp': '2025-09-10 02:43:32.595670', 'step': 7738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:32.652357', 'step': 7738, 'epoch': 2} {'type': 'loss', 'content': 0.15771353244781494, 'timestamp': '2025-09-10 02:43:32.654607', 'step': 7739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:32.709349', 'step': 7739, 'epoch': 2} {'type': 'loss', 'content': 0.21303199231624603, 'timestamp': '2025-09-10 02:43:32.715802', 'step': 7740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:32.769811', 'step': 7740, 'epoch': 2} {'type': 'loss', 'content': 0.06907237321138382, 'timestamp': '2025-09-10 02:43:32.772273', 'step': 7741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:32.826869', 'step': 7741, 'epoch': 2} {'type': 'loss', 'content': 0.12416689842939377, 'timestamp': '2025-09-10 02:43:32.828988', 'step': 7742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:32.883555', 'step': 7742, 'epoch': 2} {'type': 'loss', 'content': 0.10663239657878876, 'timestamp': '2025-09-10 02:43:32.885712', 'step': 7743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:32.939922', 'step': 7743, 'epoch': 2} {'type': 'loss', 'content': 0.07934150844812393, 'timestamp': '2025-09-10 02:43:32.946100', 'step': 7744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:32.998738', 'step': 7744, 'epoch': 2} {'type': 'loss', 'content': 0.15375541150569916, 'timestamp': '2025-09-10 02:43:33.001029', 'step': 7745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:33.054289', 'step': 7745, 'epoch': 2} {'type': 'loss', 'content': 0.21906188130378723, 'timestamp': '2025-09-10 02:43:33.056623', 'step': 7746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:33.112278', 'step': 7746, 'epoch': 2} {'type': 'loss', 'content': 0.23094674944877625, 'timestamp': '2025-09-10 02:43:33.114357', 'step': 7747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:33.169275', 'step': 7747, 'epoch': 2} {'type': 'loss', 'content': 0.29277303814888, 'timestamp': '2025-09-10 02:43:33.175667', 'step': 7748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:33.229245', 'step': 7748, 'epoch': 2} {'type': 'loss', 'content': 0.19802840054035187, 'timestamp': '2025-09-10 02:43:33.231448', 'step': 7749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:33.284926', 'step': 7749, 'epoch': 2} {'type': 'loss', 'content': 0.1408519744873047, 'timestamp': '2025-09-10 02:43:33.287177', 'step': 7750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:33.344835', 'step': 7750, 'epoch': 2} {'type': 'loss', 'content': 0.09708718210458755, 'timestamp': '2025-09-10 02:43:33.347014', 'step': 7751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:33.400622', 'step': 7751, 'epoch': 2} {'type': 'loss', 'content': 0.21594218909740448, 'timestamp': '2025-09-10 02:43:33.406814', 'step': 7752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:33.460201', 'step': 7752, 'epoch': 2} {'type': 'loss', 'content': 0.2503845989704132, 'timestamp': '2025-09-10 02:43:33.462342', 'step': 7753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:33.517795', 'step': 7753, 'epoch': 2} {'type': 'loss', 'content': 0.201088547706604, 'timestamp': '2025-09-10 02:43:33.520060', 'step': 7754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:33.574227', 'step': 7754, 'epoch': 2} {'type': 'loss', 'content': 0.1520695835351944, 'timestamp': '2025-09-10 02:43:33.576609', 'step': 7755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:33.633536', 'step': 7755, 'epoch': 2} {'type': 'loss', 'content': 0.09941941499710083, 'timestamp': '2025-09-10 02:43:33.639505', 'step': 7756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:33.694732', 'step': 7756, 'epoch': 2} {'type': 'loss', 'content': 0.1447664499282837, 'timestamp': '2025-09-10 02:43:33.696864', 'step': 7757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:33.751206', 'step': 7757, 'epoch': 2} {'type': 'loss', 'content': 0.12617309391498566, 'timestamp': '2025-09-10 02:43:33.753418', 'step': 7758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:33.807135', 'step': 7758, 'epoch': 2} {'type': 'loss', 'content': 0.15938550233840942, 'timestamp': '2025-09-10 02:43:33.809184', 'step': 7759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:33.863019', 'step': 7759, 'epoch': 2} {'type': 'loss', 'content': 0.15169434249401093, 'timestamp': '2025-09-10 02:43:33.869024', 'step': 7760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:33.923488', 'step': 7760, 'epoch': 2} {'type': 'loss', 'content': 0.20311975479125977, 'timestamp': '2025-09-10 02:43:33.925927', 'step': 7761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:33.979514', 'step': 7761, 'epoch': 2} {'type': 'loss', 'content': 0.18303921818733215, 'timestamp': '2025-09-10 02:43:33.982116', 'step': 7762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:34.036621', 'step': 7762, 'epoch': 2} {'type': 'loss', 'content': 0.13498243689537048, 'timestamp': '2025-09-10 02:43:34.038804', 'step': 7763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:34.093187', 'step': 7763, 'epoch': 2} {'type': 'loss', 'content': 0.13566553592681885, 'timestamp': '2025-09-10 02:43:34.099050', 'step': 7764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:43:34.166072', 'step': 7764, 'epoch': 2} {'type': 'loss', 'content': 0.15315480530261993, 'timestamp': '2025-09-10 02:43:34.179587', 'step': 7765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:34.235682', 'step': 7765, 'epoch': 2} {'type': 'loss', 'content': 0.21096661686897278, 'timestamp': '2025-09-10 02:43:34.237864', 'step': 7766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:34.291378', 'step': 7766, 'epoch': 2} {'type': 'loss', 'content': 0.1925603747367859, 'timestamp': '2025-09-10 02:43:34.293492', 'step': 7767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:34.347189', 'step': 7767, 'epoch': 2} {'type': 'loss', 'content': 0.10382292419672012, 'timestamp': '2025-09-10 02:43:34.353300', 'step': 7768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:34.406804', 'step': 7768, 'epoch': 2} {'type': 'loss', 'content': 0.13774529099464417, 'timestamp': '2025-09-10 02:43:34.409330', 'step': 7769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:34.462461', 'step': 7769, 'epoch': 2} {'type': 'loss', 'content': 0.17612510919570923, 'timestamp': '2025-09-10 02:43:34.464786', 'step': 7770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:34.519226', 'step': 7770, 'epoch': 2} {'type': 'loss', 'content': 0.1670878827571869, 'timestamp': '2025-09-10 02:43:34.521579', 'step': 7771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:34.575325', 'step': 7771, 'epoch': 2} {'type': 'loss', 'content': 0.10774710774421692, 'timestamp': '2025-09-10 02:43:34.582714', 'step': 7772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:34.637373', 'step': 7772, 'epoch': 2} {'type': 'loss', 'content': 0.06680858135223389, 'timestamp': '2025-09-10 02:43:34.639586', 'step': 7773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:34.694273', 'step': 7773, 'epoch': 2} {'type': 'loss', 'content': 0.143364816904068, 'timestamp': '2025-09-10 02:43:34.696515', 'step': 7774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:34.751372', 'step': 7774, 'epoch': 2} {'type': 'loss', 'content': 0.1314416229724884, 'timestamp': '2025-09-10 02:43:34.753620', 'step': 7775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:34.807713', 'step': 7775, 'epoch': 2} {'type': 'loss', 'content': 0.10703011602163315, 'timestamp': '2025-09-10 02:43:34.813963', 'step': 7776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:34.866890', 'step': 7776, 'epoch': 2} {'type': 'loss', 'content': 0.09979741275310516, 'timestamp': '2025-09-10 02:43:34.869113', 'step': 7777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:34.922167', 'step': 7777, 'epoch': 2} {'type': 'loss', 'content': 0.10023870319128036, 'timestamp': '2025-09-10 02:43:34.924458', 'step': 7778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:34.977884', 'step': 7778, 'epoch': 2} {'type': 'loss', 'content': 0.16774433851242065, 'timestamp': '2025-09-10 02:43:34.980061', 'step': 7779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:35.036518', 'step': 7779, 'epoch': 2} {'type': 'loss', 'content': 0.17016299068927765, 'timestamp': '2025-09-10 02:43:35.042505', 'step': 7780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:35.096885', 'step': 7780, 'epoch': 2} {'type': 'loss', 'content': 0.23205362260341644, 'timestamp': '2025-09-10 02:43:35.099042', 'step': 7781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:35.154149', 'step': 7781, 'epoch': 2} {'type': 'loss', 'content': 0.13335765898227692, 'timestamp': '2025-09-10 02:43:35.156370', 'step': 7782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:35.213217', 'step': 7782, 'epoch': 2} {'type': 'loss', 'content': 0.10526634007692337, 'timestamp': '2025-09-10 02:43:35.215320', 'step': 7783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:35.272129', 'step': 7783, 'epoch': 2} {'type': 'loss', 'content': 0.09866756945848465, 'timestamp': '2025-09-10 02:43:35.278395', 'step': 7784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:35.332120', 'step': 7784, 'epoch': 2} {'type': 'loss', 'content': 0.10624261200428009, 'timestamp': '2025-09-10 02:43:35.334279', 'step': 7785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:35.387791', 'step': 7785, 'epoch': 2} {'type': 'loss', 'content': 0.19142000377178192, 'timestamp': '2025-09-10 02:43:35.390068', 'step': 7786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:35.444172', 'step': 7786, 'epoch': 2} {'type': 'loss', 'content': 0.12342645227909088, 'timestamp': '2025-09-10 02:43:35.446352', 'step': 7787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:35.499242', 'step': 7787, 'epoch': 2} {'type': 'loss', 'content': 0.169989213347435, 'timestamp': '2025-09-10 02:43:35.505172', 'step': 7788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:35.556905', 'step': 7788, 'epoch': 2} {'type': 'loss', 'content': 0.17064900696277618, 'timestamp': '2025-09-10 02:43:35.559078', 'step': 7789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:35.612668', 'step': 7789, 'epoch': 2} {'type': 'loss', 'content': 0.08750464022159576, 'timestamp': '2025-09-10 02:43:35.614769', 'step': 7790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:35.669602', 'step': 7790, 'epoch': 2} {'type': 'loss', 'content': 0.16736945509910583, 'timestamp': '2025-09-10 02:43:35.671857', 'step': 7791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:35.724873', 'step': 7791, 'epoch': 2} {'type': 'loss', 'content': 0.1873701512813568, 'timestamp': '2025-09-10 02:43:35.730965', 'step': 7792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:35.783499', 'step': 7792, 'epoch': 2} {'type': 'loss', 'content': 0.10262613743543625, 'timestamp': '2025-09-10 02:43:35.785692', 'step': 7793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:35.839308', 'step': 7793, 'epoch': 2} {'type': 'loss', 'content': 0.1871108114719391, 'timestamp': '2025-09-10 02:43:35.841477', 'step': 7794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:35.895146', 'step': 7794, 'epoch': 2} {'type': 'loss', 'content': 0.08411472290754318, 'timestamp': '2025-09-10 02:43:35.897569', 'step': 7795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:35.953182', 'step': 7795, 'epoch': 2} {'type': 'loss', 'content': 0.1420988142490387, 'timestamp': '2025-09-10 02:43:35.959231', 'step': 7796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:36.012998', 'step': 7796, 'epoch': 2} {'type': 'loss', 'content': 0.1708691120147705, 'timestamp': '2025-09-10 02:43:36.014913', 'step': 7797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:36.071192', 'step': 7797, 'epoch': 2} {'type': 'loss', 'content': 0.1209525316953659, 'timestamp': '2025-09-10 02:43:36.073310', 'step': 7798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:36.128171', 'step': 7798, 'epoch': 2} {'type': 'loss', 'content': 0.1654829978942871, 'timestamp': '2025-09-10 02:43:36.130185', 'step': 7799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:36.185333', 'step': 7799, 'epoch': 2} {'type': 'loss', 'content': 0.14736156165599823, 'timestamp': '2025-09-10 02:43:36.191574', 'step': 7800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:36.245842', 'step': 7800, 'epoch': 2} {'type': 'loss', 'content': 0.13835053145885468, 'timestamp': '2025-09-10 02:43:36.248063', 'step': 7801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:36.304333', 'step': 7801, 'epoch': 2} {'type': 'loss', 'content': 0.1455911099910736, 'timestamp': '2025-09-10 02:43:36.306439', 'step': 7802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:36.361070', 'step': 7802, 'epoch': 2} {'type': 'loss', 'content': 0.14029906690120697, 'timestamp': '2025-09-10 02:43:36.363199', 'step': 7803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:36.418668', 'step': 7803, 'epoch': 2} {'type': 'loss', 'content': 0.1222747415304184, 'timestamp': '2025-09-10 02:43:36.424879', 'step': 7804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:36.479297', 'step': 7804, 'epoch': 2} {'type': 'loss', 'content': 0.20356374979019165, 'timestamp': '2025-09-10 02:43:36.481578', 'step': 7805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:36.535925', 'step': 7805, 'epoch': 2} {'type': 'loss', 'content': 0.2279062420129776, 'timestamp': '2025-09-10 02:43:36.538180', 'step': 7806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:36.592533', 'step': 7806, 'epoch': 2} {'type': 'loss', 'content': 0.2289513647556305, 'timestamp': '2025-09-10 02:43:36.594690', 'step': 7807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:36.649082', 'step': 7807, 'epoch': 2} {'type': 'loss', 'content': 0.10987217724323273, 'timestamp': '2025-09-10 02:43:36.655361', 'step': 7808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:36.716237', 'step': 7808, 'epoch': 2} {'type': 'loss', 'content': 0.1227143183350563, 'timestamp': '2025-09-10 02:43:36.718158', 'step': 7809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:36.774420', 'step': 7809, 'epoch': 2} {'type': 'loss', 'content': 0.0917583778500557, 'timestamp': '2025-09-10 02:43:36.776600', 'step': 7810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:36.832177', 'step': 7810, 'epoch': 2} {'type': 'loss', 'content': 0.1268308013677597, 'timestamp': '2025-09-10 02:43:36.834388', 'step': 7811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:36.888176', 'step': 7811, 'epoch': 2} {'type': 'loss', 'content': 0.1615382730960846, 'timestamp': '2025-09-10 02:43:36.894553', 'step': 7812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:36.948007', 'step': 7812, 'epoch': 2} {'type': 'loss', 'content': 0.08492143452167511, 'timestamp': '2025-09-10 02:43:36.950420', 'step': 7813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:37.005051', 'step': 7813, 'epoch': 2} {'type': 'loss', 'content': 0.09036826342344284, 'timestamp': '2025-09-10 02:43:37.007048', 'step': 7814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:37.063566', 'step': 7814, 'epoch': 2} {'type': 'loss', 'content': 0.06011252477765083, 'timestamp': '2025-09-10 02:43:37.065873', 'step': 7815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:37.120918', 'step': 7815, 'epoch': 2} {'type': 'loss', 'content': 0.2787840664386749, 'timestamp': '2025-09-10 02:43:37.127147', 'step': 7816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:37.184975', 'step': 7816, 'epoch': 2} {'type': 'loss', 'content': 0.14502008259296417, 'timestamp': '2025-09-10 02:43:37.187209', 'step': 7817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:37.243500', 'step': 7817, 'epoch': 2} {'type': 'loss', 'content': 0.05832226574420929, 'timestamp': '2025-09-10 02:43:37.248173', 'step': 7818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:37.303885', 'step': 7818, 'epoch': 2} {'type': 'loss', 'content': 0.15775355696678162, 'timestamp': '2025-09-10 02:43:37.306219', 'step': 7819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:37.361090', 'step': 7819, 'epoch': 2} {'type': 'loss', 'content': 0.16726802289485931, 'timestamp': '2025-09-10 02:43:37.367690', 'step': 7820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:37.422430', 'step': 7820, 'epoch': 2} {'type': 'loss', 'content': 0.09213877469301224, 'timestamp': '2025-09-10 02:43:37.424990', 'step': 7821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:37.479519', 'step': 7821, 'epoch': 2} {'type': 'loss', 'content': 0.13919910788536072, 'timestamp': '2025-09-10 02:43:37.481897', 'step': 7822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:37.536783', 'step': 7822, 'epoch': 2} {'type': 'loss', 'content': 0.07247327268123627, 'timestamp': '2025-09-10 02:43:37.541113', 'step': 7823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:37.597384', 'step': 7823, 'epoch': 2} {'type': 'loss', 'content': 0.10367459803819656, 'timestamp': '2025-09-10 02:43:37.603732', 'step': 7824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:37.669509', 'step': 7824, 'epoch': 2} {'type': 'loss', 'content': 0.2302704006433487, 'timestamp': '2025-09-10 02:43:37.672125', 'step': 7825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:37.729697', 'step': 7825, 'epoch': 2} {'type': 'loss', 'content': 0.137389674782753, 'timestamp': '2025-09-10 02:43:37.733783', 'step': 7826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:37.789183', 'step': 7826, 'epoch': 2} {'type': 'loss', 'content': 0.290956050157547, 'timestamp': '2025-09-10 02:43:37.791736', 'step': 7827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:37.846473', 'step': 7827, 'epoch': 2} {'type': 'loss', 'content': 0.10350489616394043, 'timestamp': '2025-09-10 02:43:37.853057', 'step': 7828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:37.907902', 'step': 7828, 'epoch': 2} {'type': 'loss', 'content': 0.17249515652656555, 'timestamp': '2025-09-10 02:43:37.910889', 'step': 7829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:37.965826', 'step': 7829, 'epoch': 2} {'type': 'loss', 'content': 0.07657133787870407, 'timestamp': '2025-09-10 02:43:37.967925', 'step': 7830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:38.023300', 'step': 7830, 'epoch': 2} {'type': 'loss', 'content': 0.17262189090251923, 'timestamp': '2025-09-10 02:43:38.025597', 'step': 7831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:38.080217', 'step': 7831, 'epoch': 2} {'type': 'loss', 'content': 0.22137828171253204, 'timestamp': '2025-09-10 02:43:38.086575', 'step': 7832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:38.140876', 'step': 7832, 'epoch': 2} {'type': 'loss', 'content': 0.11504419893026352, 'timestamp': '2025-09-10 02:43:38.143034', 'step': 7833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:38.197201', 'step': 7833, 'epoch': 2} {'type': 'loss', 'content': 0.14216911792755127, 'timestamp': '2025-09-10 02:43:38.199451', 'step': 7834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:38.253640', 'step': 7834, 'epoch': 2} {'type': 'loss', 'content': 0.1241292655467987, 'timestamp': '2025-09-10 02:43:38.255795', 'step': 7835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:38.310700', 'step': 7835, 'epoch': 2} {'type': 'loss', 'content': 0.09018965065479279, 'timestamp': '2025-09-10 02:43:38.317426', 'step': 7836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:38.370946', 'step': 7836, 'epoch': 2} {'type': 'loss', 'content': 0.1866607367992401, 'timestamp': '2025-09-10 02:43:38.373066', 'step': 7837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:38.428195', 'step': 7837, 'epoch': 2} {'type': 'loss', 'content': 0.10595712810754776, 'timestamp': '2025-09-10 02:43:38.430314', 'step': 7838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:38.484730', 'step': 7838, 'epoch': 2} {'type': 'loss', 'content': 0.1422957330942154, 'timestamp': '2025-09-10 02:43:38.486912', 'step': 7839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:38.540752', 'step': 7839, 'epoch': 2} {'type': 'loss', 'content': 0.2737729847431183, 'timestamp': '2025-09-10 02:43:38.547097', 'step': 7840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:38.600605', 'step': 7840, 'epoch': 2} {'type': 'loss', 'content': 0.16749325394630432, 'timestamp': '2025-09-10 02:43:38.602882', 'step': 7841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:38.657010', 'step': 7841, 'epoch': 2} {'type': 'loss', 'content': 0.27216312289237976, 'timestamp': '2025-09-10 02:43:38.659324', 'step': 7842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:38.712707', 'step': 7842, 'epoch': 2} {'type': 'loss', 'content': 0.08043445646762848, 'timestamp': '2025-09-10 02:43:38.714928', 'step': 7843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:38.768309', 'step': 7843, 'epoch': 2} {'type': 'loss', 'content': 0.13869385421276093, 'timestamp': '2025-09-10 02:43:38.774418', 'step': 7844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:38.827406', 'step': 7844, 'epoch': 2} {'type': 'loss', 'content': 0.14748170971870422, 'timestamp': '2025-09-10 02:43:38.829588', 'step': 7845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:38.885095', 'step': 7845, 'epoch': 2} {'type': 'loss', 'content': 0.09949756413698196, 'timestamp': '2025-09-10 02:43:38.887307', 'step': 7846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:38.941173', 'step': 7846, 'epoch': 2} {'type': 'loss', 'content': 0.05869435891509056, 'timestamp': '2025-09-10 02:43:38.943365', 'step': 7847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:38.997485', 'step': 7847, 'epoch': 2} {'type': 'loss', 'content': 0.2146814912557602, 'timestamp': '2025-09-10 02:43:39.003613', 'step': 7848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:39.066208', 'step': 7848, 'epoch': 2} {'type': 'loss', 'content': 0.1711171418428421, 'timestamp': '2025-09-10 02:43:39.068599', 'step': 7849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:39.122058', 'step': 7849, 'epoch': 2} {'type': 'loss', 'content': 0.12927168607711792, 'timestamp': '2025-09-10 02:43:39.124351', 'step': 7850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:39.177770', 'step': 7850, 'epoch': 2} {'type': 'loss', 'content': 0.15651103854179382, 'timestamp': '2025-09-10 02:43:39.179747', 'step': 7851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:39.233898', 'step': 7851, 'epoch': 2} {'type': 'loss', 'content': 0.17683342099189758, 'timestamp': '2025-09-10 02:43:39.239969', 'step': 7852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:39.294753', 'step': 7852, 'epoch': 2} {'type': 'loss', 'content': 0.12567265331745148, 'timestamp': '2025-09-10 02:43:39.297082', 'step': 7853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:39.352011', 'step': 7853, 'epoch': 2} {'type': 'loss', 'content': 0.1017274484038353, 'timestamp': '2025-09-10 02:43:39.354274', 'step': 7854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:39.408773', 'step': 7854, 'epoch': 2} {'type': 'loss', 'content': 0.21709409356117249, 'timestamp': '2025-09-10 02:43:39.411070', 'step': 7855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:39.466806', 'step': 7855, 'epoch': 2} {'type': 'loss', 'content': 0.09739785641431808, 'timestamp': '2025-09-10 02:43:39.473009', 'step': 7856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:39.526581', 'step': 7856, 'epoch': 2} {'type': 'loss', 'content': 0.09785135835409164, 'timestamp': '2025-09-10 02:43:39.528856', 'step': 7857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:39.583201', 'step': 7857, 'epoch': 2} {'type': 'loss', 'content': 0.2649449408054352, 'timestamp': '2025-09-10 02:43:39.585431', 'step': 7858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:39.640740', 'step': 7858, 'epoch': 2} {'type': 'loss', 'content': 0.1487559825181961, 'timestamp': '2025-09-10 02:43:39.643074', 'step': 7859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:39.699350', 'step': 7859, 'epoch': 2} {'type': 'loss', 'content': 0.12184131890535355, 'timestamp': '2025-09-10 02:43:39.705673', 'step': 7860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:39.759736', 'step': 7860, 'epoch': 2} {'type': 'loss', 'content': 0.19682933390140533, 'timestamp': '2025-09-10 02:43:39.762100', 'step': 7861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:39.816358', 'step': 7861, 'epoch': 2} {'type': 'loss', 'content': 0.136204794049263, 'timestamp': '2025-09-10 02:43:39.818346', 'step': 7862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:39.873372', 'step': 7862, 'epoch': 2} {'type': 'loss', 'content': 0.13705302774906158, 'timestamp': '2025-09-10 02:43:39.875762', 'step': 7863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:39.930509', 'step': 7863, 'epoch': 2} {'type': 'loss', 'content': 0.10214437544345856, 'timestamp': '2025-09-10 02:43:39.936854', 'step': 7864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:39.990240', 'step': 7864, 'epoch': 2} {'type': 'loss', 'content': 0.17300476133823395, 'timestamp': '2025-09-10 02:43:39.992456', 'step': 7865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:40.047639', 'step': 7865, 'epoch': 2} {'type': 'loss', 'content': 0.11459498852491379, 'timestamp': '2025-09-10 02:43:40.049829', 'step': 7866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:40.105080', 'step': 7866, 'epoch': 2} {'type': 'loss', 'content': 0.08091357350349426, 'timestamp': '2025-09-10 02:43:40.107319', 'step': 7867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:40.160861', 'step': 7867, 'epoch': 2} {'type': 'loss', 'content': 0.12487782537937164, 'timestamp': '2025-09-10 02:43:40.166850', 'step': 7868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:40.219688', 'step': 7868, 'epoch': 2} {'type': 'loss', 'content': 0.15079474449157715, 'timestamp': '2025-09-10 02:43:40.221836', 'step': 7869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:40.276479', 'step': 7869, 'epoch': 2} {'type': 'loss', 'content': 0.16283029317855835, 'timestamp': '2025-09-10 02:43:40.278748', 'step': 7870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:40.334208', 'step': 7870, 'epoch': 2} {'type': 'loss', 'content': 0.22282332181930542, 'timestamp': '2025-09-10 02:43:40.336595', 'step': 7871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:40.390844', 'step': 7871, 'epoch': 2} {'type': 'loss', 'content': 0.1459113359451294, 'timestamp': '2025-09-10 02:43:40.397092', 'step': 7872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:40.450434', 'step': 7872, 'epoch': 2} {'type': 'loss', 'content': 0.1337835043668747, 'timestamp': '2025-09-10 02:43:40.452554', 'step': 7873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:40.507441', 'step': 7873, 'epoch': 2} {'type': 'loss', 'content': 0.08884487301111221, 'timestamp': '2025-09-10 02:43:40.509636', 'step': 7874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:40.565544', 'step': 7874, 'epoch': 2} {'type': 'loss', 'content': 0.09968108683824539, 'timestamp': '2025-09-10 02:43:40.567685', 'step': 7875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:40.622508', 'step': 7875, 'epoch': 2} {'type': 'loss', 'content': 0.0836029127240181, 'timestamp': '2025-09-10 02:43:40.628902', 'step': 7876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:40.681945', 'step': 7876, 'epoch': 2} {'type': 'loss', 'content': 0.08863633871078491, 'timestamp': '2025-09-10 02:43:40.684134', 'step': 7877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:40.737429', 'step': 7877, 'epoch': 2} {'type': 'loss', 'content': 0.13036629557609558, 'timestamp': '2025-09-10 02:43:40.739596', 'step': 7878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:40.793313', 'step': 7878, 'epoch': 2} {'type': 'loss', 'content': 0.08719780296087265, 'timestamp': '2025-09-10 02:43:40.795483', 'step': 7879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:40.849328', 'step': 7879, 'epoch': 2} {'type': 'loss', 'content': 0.13130544126033783, 'timestamp': '2025-09-10 02:43:40.855432', 'step': 7880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:40.910157', 'step': 7880, 'epoch': 2} {'type': 'loss', 'content': 0.11181475967168808, 'timestamp': '2025-09-10 02:43:40.912237', 'step': 7881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:40.966224', 'step': 7881, 'epoch': 2} {'type': 'loss', 'content': 0.18723782896995544, 'timestamp': '2025-09-10 02:43:40.968429', 'step': 7882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:41.022297', 'step': 7882, 'epoch': 2} {'type': 'loss', 'content': 0.1272565722465515, 'timestamp': '2025-09-10 02:43:41.024479', 'step': 7883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:41.077342', 'step': 7883, 'epoch': 2} {'type': 'loss', 'content': 0.133719801902771, 'timestamp': '2025-09-10 02:43:41.083464', 'step': 7884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:41.137362', 'step': 7884, 'epoch': 2} {'type': 'loss', 'content': 0.10098925232887268, 'timestamp': '2025-09-10 02:43:41.139701', 'step': 7885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:41.193670', 'step': 7885, 'epoch': 2} {'type': 'loss', 'content': 0.16466988623142242, 'timestamp': '2025-09-10 02:43:41.195834', 'step': 7886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:41.250492', 'step': 7886, 'epoch': 2} {'type': 'loss', 'content': 0.24684414267539978, 'timestamp': '2025-09-10 02:43:41.253140', 'step': 7887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:41.307400', 'step': 7887, 'epoch': 2} {'type': 'loss', 'content': 0.209585502743721, 'timestamp': '2025-09-10 02:43:41.313664', 'step': 7888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:41.367170', 'step': 7888, 'epoch': 2} {'type': 'loss', 'content': 0.13436900079250336, 'timestamp': '2025-09-10 02:43:41.370077', 'step': 7889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:41.425739', 'step': 7889, 'epoch': 2} {'type': 'loss', 'content': 0.05508951097726822, 'timestamp': '2025-09-10 02:43:41.427912', 'step': 7890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:41.481280', 'step': 7890, 'epoch': 2} {'type': 'loss', 'content': 0.13497011363506317, 'timestamp': '2025-09-10 02:43:41.483440', 'step': 7891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:41.537204', 'step': 7891, 'epoch': 2} {'type': 'loss', 'content': 0.13054631650447845, 'timestamp': '2025-09-10 02:43:41.543085', 'step': 7892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:41.595629', 'step': 7892, 'epoch': 2} {'type': 'loss', 'content': 0.10326015949249268, 'timestamp': '2025-09-10 02:43:41.597744', 'step': 7893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:41.651544', 'step': 7893, 'epoch': 2} {'type': 'loss', 'content': 0.06783364713191986, 'timestamp': '2025-09-10 02:43:41.653811', 'step': 7894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:41.707701', 'step': 7894, 'epoch': 2} {'type': 'loss', 'content': 0.23414911329746246, 'timestamp': '2025-09-10 02:43:41.709829', 'step': 7895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:41.764511', 'step': 7895, 'epoch': 2} {'type': 'loss', 'content': 0.09077924489974976, 'timestamp': '2025-09-10 02:43:41.770811', 'step': 7896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:41.827741', 'step': 7896, 'epoch': 2} {'type': 'loss', 'content': 0.1951896846294403, 'timestamp': '2025-09-10 02:43:41.831486', 'step': 7897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:41.888713', 'step': 7897, 'epoch': 2} {'type': 'loss', 'content': 0.16657672822475433, 'timestamp': '2025-09-10 02:43:41.891069', 'step': 7898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:41.947957', 'step': 7898, 'epoch': 2} {'type': 'loss', 'content': 0.08697202801704407, 'timestamp': '2025-09-10 02:43:41.950281', 'step': 7899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:42.009139', 'step': 7899, 'epoch': 2} {'type': 'loss', 'content': 0.2129231095314026, 'timestamp': '2025-09-10 02:43:42.015762', 'step': 7900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:42.074702', 'step': 7900, 'epoch': 2} {'type': 'loss', 'content': 0.10951752215623856, 'timestamp': '2025-09-10 02:43:42.077016', 'step': 7901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:42.134356', 'step': 7901, 'epoch': 2} {'type': 'loss', 'content': 0.0910613015294075, 'timestamp': '2025-09-10 02:43:42.136589', 'step': 7902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:42.194002', 'step': 7902, 'epoch': 2} {'type': 'loss', 'content': 0.16188675165176392, 'timestamp': '2025-09-10 02:43:42.196469', 'step': 7903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:42.253826', 'step': 7903, 'epoch': 2} {'type': 'loss', 'content': 0.19573791325092316, 'timestamp': '2025-09-10 02:43:42.262589', 'step': 7904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:42.316511', 'step': 7904, 'epoch': 2} {'type': 'loss', 'content': 0.16498316824436188, 'timestamp': '2025-09-10 02:43:42.318761', 'step': 7905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:42.372983', 'step': 7905, 'epoch': 2} {'type': 'loss', 'content': 0.16419538855552673, 'timestamp': '2025-09-10 02:43:42.375711', 'step': 7906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:42.431662', 'step': 7906, 'epoch': 2} {'type': 'loss', 'content': 0.21137571334838867, 'timestamp': '2025-09-10 02:43:42.433609', 'step': 7907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:42.487995', 'step': 7907, 'epoch': 2} {'type': 'loss', 'content': 0.1371973156929016, 'timestamp': '2025-09-10 02:43:42.494213', 'step': 7908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:42.547620', 'step': 7908, 'epoch': 2} {'type': 'loss', 'content': 0.1521218866109848, 'timestamp': '2025-09-10 02:43:42.549710', 'step': 7909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:42.602664', 'step': 7909, 'epoch': 2} {'type': 'loss', 'content': 0.11819501966238022, 'timestamp': '2025-09-10 02:43:42.604792', 'step': 7910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:42.657978', 'step': 7910, 'epoch': 2} {'type': 'loss', 'content': 0.1426493227481842, 'timestamp': '2025-09-10 02:43:42.660456', 'step': 7911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:42.720766', 'step': 7911, 'epoch': 2} {'type': 'loss', 'content': 0.12277092784643173, 'timestamp': '2025-09-10 02:43:42.728175', 'step': 7912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:42.785852', 'step': 7912, 'epoch': 2} {'type': 'loss', 'content': 0.20682024955749512, 'timestamp': '2025-09-10 02:43:42.788323', 'step': 7913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:42.849286', 'step': 7913, 'epoch': 2} {'type': 'loss', 'content': 0.1755475103855133, 'timestamp': '2025-09-10 02:43:42.851737', 'step': 7914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:42.911911', 'step': 7914, 'epoch': 2} {'type': 'loss', 'content': 0.19265976548194885, 'timestamp': '2025-09-10 02:43:42.914176', 'step': 7915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:42.975309', 'step': 7915, 'epoch': 2} {'type': 'loss', 'content': 0.13467124104499817, 'timestamp': '2025-09-10 02:43:42.982471', 'step': 7916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:43.041393', 'step': 7916, 'epoch': 2} {'type': 'loss', 'content': 0.15294641256332397, 'timestamp': '2025-09-10 02:43:43.043551', 'step': 7917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:43.098209', 'step': 7917, 'epoch': 2} {'type': 'loss', 'content': 0.11041189730167389, 'timestamp': '2025-09-10 02:43:43.100334', 'step': 7918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:43.154192', 'step': 7918, 'epoch': 2} {'type': 'loss', 'content': 0.13733941316604614, 'timestamp': '2025-09-10 02:43:43.156375', 'step': 7919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:43.209918', 'step': 7919, 'epoch': 2} {'type': 'loss', 'content': 0.19361232221126556, 'timestamp': '2025-09-10 02:43:43.216061', 'step': 7920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:43.270816', 'step': 7920, 'epoch': 2} {'type': 'loss', 'content': 0.1365046501159668, 'timestamp': '2025-09-10 02:43:43.273116', 'step': 7921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:43.326879', 'step': 7921, 'epoch': 2} {'type': 'loss', 'content': 0.18066586554050446, 'timestamp': '2025-09-10 02:43:43.328955', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:43:55.920217', 'step': 7922, 'epoch': 2} {'type': 'pplx', 'content': 12914.038616490956, 'timestamp': '2025-09-10 02:43:55.923263', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:55.977782', 'step': 7922, 'epoch': 2} {'type': 'loss', 'content': 0.10753806680440903, 'timestamp': '2025-09-10 02:43:55.980075', 'step': 7923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:56.034359', 'step': 7923, 'epoch': 2} {'type': 'loss', 'content': 0.17858707904815674, 'timestamp': '2025-09-10 02:43:56.040246', 'step': 7924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:56.094074', 'step': 7924, 'epoch': 2} {'type': 'loss', 'content': 0.10595495998859406, 'timestamp': '2025-09-10 02:43:56.096188', 'step': 7925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:56.149845', 'step': 7925, 'epoch': 2} {'type': 'loss', 'content': 0.12821829319000244, 'timestamp': '2025-09-10 02:43:56.151977', 'step': 7926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:56.205948', 'step': 7926, 'epoch': 2} {'type': 'loss', 'content': 0.10701503604650497, 'timestamp': '2025-09-10 02:43:56.208581', 'step': 7927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:56.261423', 'step': 7927, 'epoch': 2} {'type': 'loss', 'content': 0.14060458540916443, 'timestamp': '2025-09-10 02:43:56.267514', 'step': 7928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:56.320223', 'step': 7928, 'epoch': 2} {'type': 'loss', 'content': 0.17840051651000977, 'timestamp': '2025-09-10 02:43:56.322297', 'step': 7929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:56.374390', 'step': 7929, 'epoch': 2} {'type': 'loss', 'content': 0.06717949360609055, 'timestamp': '2025-09-10 02:43:56.376702', 'step': 7930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:56.429634', 'step': 7930, 'epoch': 2} {'type': 'loss', 'content': 0.23784668743610382, 'timestamp': '2025-09-10 02:43:56.432117', 'step': 7931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:56.487089', 'step': 7931, 'epoch': 2} {'type': 'loss', 'content': 0.14758454263210297, 'timestamp': '2025-09-10 02:43:56.492918', 'step': 7932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:56.554981', 'step': 7932, 'epoch': 2} {'type': 'loss', 'content': 0.11642293632030487, 'timestamp': '2025-09-10 02:43:56.556948', 'step': 7933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:56.613916', 'step': 7933, 'epoch': 2} {'type': 'loss', 'content': 0.067520372569561, 'timestamp': '2025-09-10 02:43:56.616107', 'step': 7934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:56.671216', 'step': 7934, 'epoch': 2} {'type': 'loss', 'content': 0.23276746273040771, 'timestamp': '2025-09-10 02:43:56.673325', 'step': 7935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:56.726406', 'step': 7935, 'epoch': 2} {'type': 'loss', 'content': 0.08676022291183472, 'timestamp': '2025-09-10 02:43:56.732670', 'step': 7936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:56.785827', 'step': 7936, 'epoch': 2} {'type': 'loss', 'content': 0.15506617724895477, 'timestamp': '2025-09-10 02:43:56.788036', 'step': 7937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:56.841351', 'step': 7937, 'epoch': 2} {'type': 'loss', 'content': 0.08924780040979385, 'timestamp': '2025-09-10 02:43:56.843527', 'step': 7938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:56.896987', 'step': 7938, 'epoch': 2} {'type': 'loss', 'content': 0.11892006546258926, 'timestamp': '2025-09-10 02:43:56.899106', 'step': 7939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:56.952818', 'step': 7939, 'epoch': 2} {'type': 'loss', 'content': 0.2785438597202301, 'timestamp': '2025-09-10 02:43:56.958356', 'step': 7940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:57.010352', 'step': 7940, 'epoch': 2} {'type': 'loss', 'content': 0.17257720232009888, 'timestamp': '2025-09-10 02:43:57.012527', 'step': 7941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:57.064522', 'step': 7941, 'epoch': 2} {'type': 'loss', 'content': 0.15655279159545898, 'timestamp': '2025-09-10 02:43:57.066571', 'step': 7942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:57.120943', 'step': 7942, 'epoch': 2} {'type': 'loss', 'content': 0.09857479482889175, 'timestamp': '2025-09-10 02:43:57.122911', 'step': 7943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:57.176270', 'step': 7943, 'epoch': 2} {'type': 'loss', 'content': 0.2061336189508438, 'timestamp': '2025-09-10 02:43:57.182104', 'step': 7944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:57.239400', 'step': 7944, 'epoch': 2} {'type': 'loss', 'content': 0.10573519021272659, 'timestamp': '2025-09-10 02:43:57.241453', 'step': 7945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:57.294397', 'step': 7945, 'epoch': 2} {'type': 'loss', 'content': 0.102004773914814, 'timestamp': '2025-09-10 02:43:57.296342', 'step': 7946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:57.349167', 'step': 7946, 'epoch': 2} {'type': 'loss', 'content': 0.05321522429585457, 'timestamp': '2025-09-10 02:43:57.351067', 'step': 7947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:57.404379', 'step': 7947, 'epoch': 2} {'type': 'loss', 'content': 0.18472830951213837, 'timestamp': '2025-09-10 02:43:57.410078', 'step': 7948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:57.462245', 'step': 7948, 'epoch': 2} {'type': 'loss', 'content': 0.1622268706560135, 'timestamp': '2025-09-10 02:43:57.464347', 'step': 7949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:57.516945', 'step': 7949, 'epoch': 2} {'type': 'loss', 'content': 0.16965560615062714, 'timestamp': '2025-09-10 02:43:57.518934', 'step': 7950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:57.572755', 'step': 7950, 'epoch': 2} {'type': 'loss', 'content': 0.18605950474739075, 'timestamp': '2025-09-10 02:43:57.575119', 'step': 7951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:57.629150', 'step': 7951, 'epoch': 2} {'type': 'loss', 'content': 0.13372009992599487, 'timestamp': '2025-09-10 02:43:57.635136', 'step': 7952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:57.687238', 'step': 7952, 'epoch': 2} {'type': 'loss', 'content': 0.1084105595946312, 'timestamp': '2025-09-10 02:43:57.689461', 'step': 7953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:57.742833', 'step': 7953, 'epoch': 2} {'type': 'loss', 'content': 0.1696164757013321, 'timestamp': '2025-09-10 02:43:57.744836', 'step': 7954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:57.797538', 'step': 7954, 'epoch': 2} {'type': 'loss', 'content': 0.17590661346912384, 'timestamp': '2025-09-10 02:43:57.799702', 'step': 7955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:57.852502', 'step': 7955, 'epoch': 2} {'type': 'loss', 'content': 0.20358119904994965, 'timestamp': '2025-09-10 02:43:57.858430', 'step': 7956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:57.911154', 'step': 7956, 'epoch': 2} {'type': 'loss', 'content': 0.15728013217449188, 'timestamp': '2025-09-10 02:43:57.913201', 'step': 7957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:57.965935', 'step': 7957, 'epoch': 2} {'type': 'loss', 'content': 0.14579296112060547, 'timestamp': '2025-09-10 02:43:57.967977', 'step': 7958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:58.020833', 'step': 7958, 'epoch': 2} {'type': 'loss', 'content': 0.08634781092405319, 'timestamp': '2025-09-10 02:43:58.022948', 'step': 7959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:58.075834', 'step': 7959, 'epoch': 2} {'type': 'loss', 'content': 0.07886873185634613, 'timestamp': '2025-09-10 02:43:58.081516', 'step': 7960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:58.135048', 'step': 7960, 'epoch': 2} {'type': 'loss', 'content': 0.0652722716331482, 'timestamp': '2025-09-10 02:43:58.136974', 'step': 7961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:58.190150', 'step': 7961, 'epoch': 2} {'type': 'loss', 'content': 0.16891972720623016, 'timestamp': '2025-09-10 02:43:58.192232', 'step': 7962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:58.246506', 'step': 7962, 'epoch': 2} {'type': 'loss', 'content': 0.11396041512489319, 'timestamp': '2025-09-10 02:43:58.248761', 'step': 7963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:58.302047', 'step': 7963, 'epoch': 2} {'type': 'loss', 'content': 0.08803070336580276, 'timestamp': '2025-09-10 02:43:58.307952', 'step': 7964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:58.367623', 'step': 7964, 'epoch': 2} {'type': 'loss', 'content': 0.11082354933023453, 'timestamp': '2025-09-10 02:43:58.370002', 'step': 7965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:58.422804', 'step': 7965, 'epoch': 2} {'type': 'loss', 'content': 0.1549081653356552, 'timestamp': '2025-09-10 02:43:58.425048', 'step': 7966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:58.479613', 'step': 7966, 'epoch': 2} {'type': 'loss', 'content': 0.23951435089111328, 'timestamp': '2025-09-10 02:43:58.481936', 'step': 7967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:58.534772', 'step': 7967, 'epoch': 2} {'type': 'loss', 'content': 0.09916616231203079, 'timestamp': '2025-09-10 02:43:58.540452', 'step': 7968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:58.593221', 'step': 7968, 'epoch': 2} {'type': 'loss', 'content': 0.1457882970571518, 'timestamp': '2025-09-10 02:43:58.595386', 'step': 7969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:58.648353', 'step': 7969, 'epoch': 2} {'type': 'loss', 'content': 0.09141744673252106, 'timestamp': '2025-09-10 02:43:58.650453', 'step': 7970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:58.703034', 'step': 7970, 'epoch': 2} {'type': 'loss', 'content': 0.06438836455345154, 'timestamp': '2025-09-10 02:43:58.705084', 'step': 7971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:58.758745', 'step': 7971, 'epoch': 2} {'type': 'loss', 'content': 0.09363032877445221, 'timestamp': '2025-09-10 02:43:58.764543', 'step': 7972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:58.817657', 'step': 7972, 'epoch': 2} {'type': 'loss', 'content': 0.2318078875541687, 'timestamp': '2025-09-10 02:43:58.819718', 'step': 7973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:58.872516', 'step': 7973, 'epoch': 2} {'type': 'loss', 'content': 0.1387587934732437, 'timestamp': '2025-09-10 02:43:58.874600', 'step': 7974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:58.930490', 'step': 7974, 'epoch': 2} {'type': 'loss', 'content': 0.13573090732097626, 'timestamp': '2025-09-10 02:43:58.932448', 'step': 7975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:58.984943', 'step': 7975, 'epoch': 2} {'type': 'loss', 'content': 0.14753951132297516, 'timestamp': '2025-09-10 02:43:58.990539', 'step': 7976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:59.044195', 'step': 7976, 'epoch': 2} {'type': 'loss', 'content': 0.1573403775691986, 'timestamp': '2025-09-10 02:43:59.046195', 'step': 7977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:59.099554', 'step': 7977, 'epoch': 2} {'type': 'loss', 'content': 0.09072991460561752, 'timestamp': '2025-09-10 02:43:59.101747', 'step': 7978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:59.157525', 'step': 7978, 'epoch': 2} {'type': 'loss', 'content': 0.170343279838562, 'timestamp': '2025-09-10 02:43:59.159503', 'step': 7979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:59.213355', 'step': 7979, 'epoch': 2} {'type': 'loss', 'content': 0.09616818279027939, 'timestamp': '2025-09-10 02:43:59.219390', 'step': 7980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:43:59.271929', 'step': 7980, 'epoch': 2} {'type': 'loss', 'content': 0.09063604474067688, 'timestamp': '2025-09-10 02:43:59.274272', 'step': 7981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:59.328949', 'step': 7981, 'epoch': 2} {'type': 'loss', 'content': 0.2947785258293152, 'timestamp': '2025-09-10 02:43:59.331073', 'step': 7982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:59.384083', 'step': 7982, 'epoch': 2} {'type': 'loss', 'content': 0.12530875205993652, 'timestamp': '2025-09-10 02:43:59.386160', 'step': 7983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:59.439332', 'step': 7983, 'epoch': 2} {'type': 'loss', 'content': 0.06513351202011108, 'timestamp': '2025-09-10 02:43:59.445095', 'step': 7984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:59.497600', 'step': 7984, 'epoch': 2} {'type': 'loss', 'content': 0.059920988976955414, 'timestamp': '2025-09-10 02:43:59.499891', 'step': 7985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:59.554871', 'step': 7985, 'epoch': 2} {'type': 'loss', 'content': 0.18258267641067505, 'timestamp': '2025-09-10 02:43:59.556794', 'step': 7986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:59.609803', 'step': 7986, 'epoch': 2} {'type': 'loss', 'content': 0.16298198699951172, 'timestamp': '2025-09-10 02:43:59.611838', 'step': 7987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:59.665965', 'step': 7987, 'epoch': 2} {'type': 'loss', 'content': 0.132804274559021, 'timestamp': '2025-09-10 02:43:59.671893', 'step': 7988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:59.724517', 'step': 7988, 'epoch': 2} {'type': 'loss', 'content': 0.09225795418024063, 'timestamp': '2025-09-10 02:43:59.726458', 'step': 7989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:43:59.779590', 'step': 7989, 'epoch': 2} {'type': 'loss', 'content': 0.15883980691432953, 'timestamp': '2025-09-10 02:43:59.781563', 'step': 7990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:59.834845', 'step': 7990, 'epoch': 2} {'type': 'loss', 'content': 0.11421213299036026, 'timestamp': '2025-09-10 02:43:59.836789', 'step': 7991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:43:59.890090', 'step': 7991, 'epoch': 2} {'type': 'loss', 'content': 0.07039176672697067, 'timestamp': '2025-09-10 02:43:59.895896', 'step': 7992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:59.949007', 'step': 7992, 'epoch': 2} {'type': 'loss', 'content': 0.1331150084733963, 'timestamp': '2025-09-10 02:43:59.951192', 'step': 7993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:00.004540', 'step': 7993, 'epoch': 2} {'type': 'loss', 'content': 0.1303645670413971, 'timestamp': '2025-09-10 02:44:00.006712', 'step': 7994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:00.061621', 'step': 7994, 'epoch': 2} {'type': 'loss', 'content': 0.2512194514274597, 'timestamp': '2025-09-10 02:44:00.063816', 'step': 7995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:00.116820', 'step': 7995, 'epoch': 2} {'type': 'loss', 'content': 0.17313748598098755, 'timestamp': '2025-09-10 02:44:00.122680', 'step': 7996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:00.174967', 'step': 7996, 'epoch': 2} {'type': 'loss', 'content': 0.09293460100889206, 'timestamp': '2025-09-10 02:44:00.177114', 'step': 7997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:00.229929', 'step': 7997, 'epoch': 2} {'type': 'loss', 'content': 0.09817145764827728, 'timestamp': '2025-09-10 02:44:00.231832', 'step': 7998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:00.284428', 'step': 7998, 'epoch': 2} {'type': 'loss', 'content': 0.2087731808423996, 'timestamp': '2025-09-10 02:44:00.286569', 'step': 7999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:00.339338', 'step': 7999, 'epoch': 2} {'type': 'loss', 'content': 0.17160211503505707, 'timestamp': '2025-09-10 02:44:00.345071', 'step': 8000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8000', 'timestamp': '2025-09-10 02:44:00.744982', 'step': 8000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:00.806362', 'step': 8000, 'epoch': 2} {'type': 'loss', 'content': 0.119094617664814, 'timestamp': '2025-09-10 02:44:00.808607', 'step': 8001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:00.878681', 'step': 8001, 'epoch': 2} {'type': 'loss', 'content': 0.13157479465007782, 'timestamp': '2025-09-10 02:44:00.880662', 'step': 8002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:00.938622', 'step': 8002, 'epoch': 2} {'type': 'loss', 'content': 0.10096161812543869, 'timestamp': '2025-09-10 02:44:00.940806', 'step': 8003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:00.994348', 'step': 8003, 'epoch': 2} {'type': 'loss', 'content': 0.09563397616147995, 'timestamp': '2025-09-10 02:44:01.000395', 'step': 8004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:01.054182', 'step': 8004, 'epoch': 2} {'type': 'loss', 'content': 0.13503146171569824, 'timestamp': '2025-09-10 02:44:01.056223', 'step': 8005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:01.109227', 'step': 8005, 'epoch': 2} {'type': 'loss', 'content': 0.1575646698474884, 'timestamp': '2025-09-10 02:44:01.111232', 'step': 8006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:01.164250', 'step': 8006, 'epoch': 2} {'type': 'loss', 'content': 0.13881327211856842, 'timestamp': '2025-09-10 02:44:01.166456', 'step': 8007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:01.219340', 'step': 8007, 'epoch': 2} {'type': 'loss', 'content': 0.21150508522987366, 'timestamp': '2025-09-10 02:44:01.225436', 'step': 8008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:01.278195', 'step': 8008, 'epoch': 2} {'type': 'loss', 'content': 0.175096794962883, 'timestamp': '2025-09-10 02:44:01.280514', 'step': 8009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:01.334045', 'step': 8009, 'epoch': 2} {'type': 'loss', 'content': 0.09368714690208435, 'timestamp': '2025-09-10 02:44:01.336449', 'step': 8010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:01.390669', 'step': 8010, 'epoch': 2} {'type': 'loss', 'content': 0.11491017788648605, 'timestamp': '2025-09-10 02:44:01.392682', 'step': 8011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:01.446003', 'step': 8011, 'epoch': 2} {'type': 'loss', 'content': 0.1923549771308899, 'timestamp': '2025-09-10 02:44:01.452012', 'step': 8012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:01.505643', 'step': 8012, 'epoch': 2} {'type': 'loss', 'content': 0.13687804341316223, 'timestamp': '2025-09-10 02:44:01.507565', 'step': 8013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:01.560430', 'step': 8013, 'epoch': 2} {'type': 'loss', 'content': 0.16853949427604675, 'timestamp': '2025-09-10 02:44:01.562629', 'step': 8014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:01.616193', 'step': 8014, 'epoch': 2} {'type': 'loss', 'content': 0.12447475641965866, 'timestamp': '2025-09-10 02:44:01.618473', 'step': 8015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:01.671220', 'step': 8015, 'epoch': 2} {'type': 'loss', 'content': 0.10218679904937744, 'timestamp': '2025-09-10 02:44:01.677087', 'step': 8016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:01.730336', 'step': 8016, 'epoch': 2} {'type': 'loss', 'content': 0.0845487117767334, 'timestamp': '2025-09-10 02:44:01.732493', 'step': 8017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:01.785942', 'step': 8017, 'epoch': 2} {'type': 'loss', 'content': 0.07536200433969498, 'timestamp': '2025-09-10 02:44:01.788271', 'step': 8018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:01.841810', 'step': 8018, 'epoch': 2} {'type': 'loss', 'content': 0.10931381583213806, 'timestamp': '2025-09-10 02:44:01.843928', 'step': 8019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:01.897080', 'step': 8019, 'epoch': 2} {'type': 'loss', 'content': 0.138685941696167, 'timestamp': '2025-09-10 02:44:01.902837', 'step': 8020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:01.955484', 'step': 8020, 'epoch': 2} {'type': 'loss', 'content': 0.16094638407230377, 'timestamp': '2025-09-10 02:44:01.957647', 'step': 8021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:02.011498', 'step': 8021, 'epoch': 2} {'type': 'loss', 'content': 0.08513294160366058, 'timestamp': '2025-09-10 02:44:02.013624', 'step': 8022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:02.067228', 'step': 8022, 'epoch': 2} {'type': 'loss', 'content': 0.12680651247501373, 'timestamp': '2025-09-10 02:44:02.069481', 'step': 8023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:02.123283', 'step': 8023, 'epoch': 2} {'type': 'loss', 'content': 0.22552190721035004, 'timestamp': '2025-09-10 02:44:02.129341', 'step': 8024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:02.184739', 'step': 8024, 'epoch': 2} {'type': 'loss', 'content': 0.21279986202716827, 'timestamp': '2025-09-10 02:44:02.186894', 'step': 8025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:02.241445', 'step': 8025, 'epoch': 2} {'type': 'loss', 'content': 0.19012001156806946, 'timestamp': '2025-09-10 02:44:02.243628', 'step': 8026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:02.301289', 'step': 8026, 'epoch': 2} {'type': 'loss', 'content': 0.20964719355106354, 'timestamp': '2025-09-10 02:44:02.303590', 'step': 8027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:02.359447', 'step': 8027, 'epoch': 2} {'type': 'loss', 'content': 0.13361510634422302, 'timestamp': '2025-09-10 02:44:02.365619', 'step': 8028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:02.420857', 'step': 8028, 'epoch': 2} {'type': 'loss', 'content': 0.15401674807071686, 'timestamp': '2025-09-10 02:44:02.422987', 'step': 8029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:02.477572', 'step': 8029, 'epoch': 2} {'type': 'loss', 'content': 0.1407705694437027, 'timestamp': '2025-09-10 02:44:02.479783', 'step': 8030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:02.535370', 'step': 8030, 'epoch': 2} {'type': 'loss', 'content': 0.15449413657188416, 'timestamp': '2025-09-10 02:44:02.537595', 'step': 8031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:02.591503', 'step': 8031, 'epoch': 2} {'type': 'loss', 'content': 0.10152187198400497, 'timestamp': '2025-09-10 02:44:02.597600', 'step': 8032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:02.649866', 'step': 8032, 'epoch': 2} {'type': 'loss', 'content': 0.05630747601389885, 'timestamp': '2025-09-10 02:44:02.652061', 'step': 8033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:02.705508', 'step': 8033, 'epoch': 2} {'type': 'loss', 'content': 0.3044026494026184, 'timestamp': '2025-09-10 02:44:02.707635', 'step': 8034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:02.760715', 'step': 8034, 'epoch': 2} {'type': 'loss', 'content': 0.0563417449593544, 'timestamp': '2025-09-10 02:44:02.762842', 'step': 8035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:02.816063', 'step': 8035, 'epoch': 2} {'type': 'loss', 'content': 0.10594264417886734, 'timestamp': '2025-09-10 02:44:02.822103', 'step': 8036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:02.876624', 'step': 8036, 'epoch': 2} {'type': 'loss', 'content': 0.054554425179958344, 'timestamp': '2025-09-10 02:44:02.878875', 'step': 8037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:02.932346', 'step': 8037, 'epoch': 2} {'type': 'loss', 'content': 0.21211381256580353, 'timestamp': '2025-09-10 02:44:02.934667', 'step': 8038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:02.988128', 'step': 8038, 'epoch': 2} {'type': 'loss', 'content': 0.19536364078521729, 'timestamp': '2025-09-10 02:44:02.990369', 'step': 8039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:03.044441', 'step': 8039, 'epoch': 2} {'type': 'loss', 'content': 0.16190998256206512, 'timestamp': '2025-09-10 02:44:03.050626', 'step': 8040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:03.104021', 'step': 8040, 'epoch': 2} {'type': 'loss', 'content': 0.10838285088539124, 'timestamp': '2025-09-10 02:44:03.106195', 'step': 8041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:03.159535', 'step': 8041, 'epoch': 2} {'type': 'loss', 'content': 0.0917181596159935, 'timestamp': '2025-09-10 02:44:03.161710', 'step': 8042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:03.215569', 'step': 8042, 'epoch': 2} {'type': 'loss', 'content': 0.10538774728775024, 'timestamp': '2025-09-10 02:44:03.217717', 'step': 8043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:03.273253', 'step': 8043, 'epoch': 2} {'type': 'loss', 'content': 0.18226337432861328, 'timestamp': '2025-09-10 02:44:03.279255', 'step': 8044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:03.332423', 'step': 8044, 'epoch': 2} {'type': 'loss', 'content': 0.14300072193145752, 'timestamp': '2025-09-10 02:44:03.334506', 'step': 8045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:03.387159', 'step': 8045, 'epoch': 2} {'type': 'loss', 'content': 0.10516573488712311, 'timestamp': '2025-09-10 02:44:03.389183', 'step': 8046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:03.443007', 'step': 8046, 'epoch': 2} {'type': 'loss', 'content': 0.10238324850797653, 'timestamp': '2025-09-10 02:44:03.445190', 'step': 8047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:03.498371', 'step': 8047, 'epoch': 2} {'type': 'loss', 'content': 0.13376909494400024, 'timestamp': '2025-09-10 02:44:03.504301', 'step': 8048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:03.556783', 'step': 8048, 'epoch': 2} {'type': 'loss', 'content': 0.21460604667663574, 'timestamp': '2025-09-10 02:44:03.558776', 'step': 8049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:03.612156', 'step': 8049, 'epoch': 2} {'type': 'loss', 'content': 0.13069897890090942, 'timestamp': '2025-09-10 02:44:03.614330', 'step': 8050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:03.667685', 'step': 8050, 'epoch': 2} {'type': 'loss', 'content': 0.17841890454292297, 'timestamp': '2025-09-10 02:44:03.669826', 'step': 8051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:03.723520', 'step': 8051, 'epoch': 2} {'type': 'loss', 'content': 0.16068153083324432, 'timestamp': '2025-09-10 02:44:03.729565', 'step': 8052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:03.782334', 'step': 8052, 'epoch': 2} {'type': 'loss', 'content': 0.19461689889431, 'timestamp': '2025-09-10 02:44:03.784680', 'step': 8053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:03.837392', 'step': 8053, 'epoch': 2} {'type': 'loss', 'content': 0.1159321516752243, 'timestamp': '2025-09-10 02:44:03.839612', 'step': 8054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:03.892341', 'step': 8054, 'epoch': 2} {'type': 'loss', 'content': 0.1467115581035614, 'timestamp': '2025-09-10 02:44:03.894523', 'step': 8055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:03.947851', 'step': 8055, 'epoch': 2} {'type': 'loss', 'content': 0.2744186520576477, 'timestamp': '2025-09-10 02:44:03.954162', 'step': 8056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:04.009616', 'step': 8056, 'epoch': 2} {'type': 'loss', 'content': 0.14274773001670837, 'timestamp': '2025-09-10 02:44:04.011988', 'step': 8057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:04.066644', 'step': 8057, 'epoch': 2} {'type': 'loss', 'content': 0.15203900635242462, 'timestamp': '2025-09-10 02:44:04.068989', 'step': 8058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:04.124553', 'step': 8058, 'epoch': 2} {'type': 'loss', 'content': 0.14864695072174072, 'timestamp': '2025-09-10 02:44:04.126540', 'step': 8059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:04.182954', 'step': 8059, 'epoch': 2} {'type': 'loss', 'content': 0.16148318350315094, 'timestamp': '2025-09-10 02:44:04.189178', 'step': 8060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:04.243369', 'step': 8060, 'epoch': 2} {'type': 'loss', 'content': 0.09262657165527344, 'timestamp': '2025-09-10 02:44:04.245587', 'step': 8061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:04.300784', 'step': 8061, 'epoch': 2} {'type': 'loss', 'content': 0.23522941768169403, 'timestamp': '2025-09-10 02:44:04.303112', 'step': 8062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:04.356698', 'step': 8062, 'epoch': 2} {'type': 'loss', 'content': 0.17914624512195587, 'timestamp': '2025-09-10 02:44:04.359005', 'step': 8063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:04.414413', 'step': 8063, 'epoch': 2} {'type': 'loss', 'content': 0.16180433332920074, 'timestamp': '2025-09-10 02:44:04.420481', 'step': 8064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:04.475016', 'step': 8064, 'epoch': 2} {'type': 'loss', 'content': 0.1238822415471077, 'timestamp': '2025-09-10 02:44:04.477173', 'step': 8065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:04.530736', 'step': 8065, 'epoch': 2} {'type': 'loss', 'content': 0.09168219566345215, 'timestamp': '2025-09-10 02:44:04.533065', 'step': 8066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:04.587147', 'step': 8066, 'epoch': 2} {'type': 'loss', 'content': 0.07672452926635742, 'timestamp': '2025-09-10 02:44:04.589507', 'step': 8067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:04.643275', 'step': 8067, 'epoch': 2} {'type': 'loss', 'content': 0.15017157793045044, 'timestamp': '2025-09-10 02:44:04.649410', 'step': 8068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:04.702676', 'step': 8068, 'epoch': 2} {'type': 'loss', 'content': 0.06042637303471565, 'timestamp': '2025-09-10 02:44:04.704829', 'step': 8069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:04.758762', 'step': 8069, 'epoch': 2} {'type': 'loss', 'content': 0.16564418375492096, 'timestamp': '2025-09-10 02:44:04.760919', 'step': 8070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:04.814758', 'step': 8070, 'epoch': 2} {'type': 'loss', 'content': 0.1814688891172409, 'timestamp': '2025-09-10 02:44:04.816916', 'step': 8071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:04.870317', 'step': 8071, 'epoch': 2} {'type': 'loss', 'content': 0.18084502220153809, 'timestamp': '2025-09-10 02:44:04.876279', 'step': 8072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:04.929617', 'step': 8072, 'epoch': 2} {'type': 'loss', 'content': 0.11224578320980072, 'timestamp': '2025-09-10 02:44:04.931699', 'step': 8073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:04.984742', 'step': 8073, 'epoch': 2} {'type': 'loss', 'content': 0.09928970038890839, 'timestamp': '2025-09-10 02:44:04.986959', 'step': 8074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:05.041084', 'step': 8074, 'epoch': 2} {'type': 'loss', 'content': 0.2221977859735489, 'timestamp': '2025-09-10 02:44:05.043309', 'step': 8075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:05.096822', 'step': 8075, 'epoch': 2} {'type': 'loss', 'content': 0.13342948257923126, 'timestamp': '2025-09-10 02:44:05.102869', 'step': 8076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:05.156523', 'step': 8076, 'epoch': 2} {'type': 'loss', 'content': 0.17690545320510864, 'timestamp': '2025-09-10 02:44:05.158691', 'step': 8077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:05.213230', 'step': 8077, 'epoch': 2} {'type': 'loss', 'content': 0.19763284921646118, 'timestamp': '2025-09-10 02:44:05.215293', 'step': 8078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:05.269344', 'step': 8078, 'epoch': 2} {'type': 'loss', 'content': 0.15497592091560364, 'timestamp': '2025-09-10 02:44:05.271464', 'step': 8079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:05.326512', 'step': 8079, 'epoch': 2} {'type': 'loss', 'content': 0.1896963268518448, 'timestamp': '2025-09-10 02:44:05.332490', 'step': 8080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:05.386068', 'step': 8080, 'epoch': 2} {'type': 'loss', 'content': 0.16657233238220215, 'timestamp': '2025-09-10 02:44:05.388093', 'step': 8081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:05.442049', 'step': 8081, 'epoch': 2} {'type': 'loss', 'content': 0.17636287212371826, 'timestamp': '2025-09-10 02:44:05.444345', 'step': 8082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:05.500058', 'step': 8082, 'epoch': 2} {'type': 'loss', 'content': 0.17305375635623932, 'timestamp': '2025-09-10 02:44:05.502129', 'step': 8083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:05.557891', 'step': 8083, 'epoch': 2} {'type': 'loss', 'content': 0.1510648876428604, 'timestamp': '2025-09-10 02:44:05.564351', 'step': 8084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:05.619849', 'step': 8084, 'epoch': 2} {'type': 'loss', 'content': 0.10779617726802826, 'timestamp': '2025-09-10 02:44:05.621983', 'step': 8085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:05.677262', 'step': 8085, 'epoch': 2} {'type': 'loss', 'content': 0.19692830741405487, 'timestamp': '2025-09-10 02:44:05.679412', 'step': 8086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:05.735515', 'step': 8086, 'epoch': 2} {'type': 'loss', 'content': 0.25616690516471863, 'timestamp': '2025-09-10 02:44:05.737652', 'step': 8087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:05.791625', 'step': 8087, 'epoch': 2} {'type': 'loss', 'content': 0.13932745158672333, 'timestamp': '2025-09-10 02:44:05.797677', 'step': 8088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:05.850780', 'step': 8088, 'epoch': 2} {'type': 'loss', 'content': 0.19125616550445557, 'timestamp': '2025-09-10 02:44:05.853041', 'step': 8089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:05.907471', 'step': 8089, 'epoch': 2} {'type': 'loss', 'content': 0.08123690634965897, 'timestamp': '2025-09-10 02:44:05.909785', 'step': 8090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:05.964001', 'step': 8090, 'epoch': 2} {'type': 'loss', 'content': 0.11666398495435715, 'timestamp': '2025-09-10 02:44:05.966106', 'step': 8091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:06.020077', 'step': 8091, 'epoch': 2} {'type': 'loss', 'content': 0.1410120725631714, 'timestamp': '2025-09-10 02:44:06.026220', 'step': 8092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:06.079331', 'step': 8092, 'epoch': 2} {'type': 'loss', 'content': 0.17951829731464386, 'timestamp': '2025-09-10 02:44:06.081453', 'step': 8093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:06.135931', 'step': 8093, 'epoch': 2} {'type': 'loss', 'content': 0.17823678255081177, 'timestamp': '2025-09-10 02:44:06.138012', 'step': 8094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:06.191174', 'step': 8094, 'epoch': 2} {'type': 'loss', 'content': 0.12126587331295013, 'timestamp': '2025-09-10 02:44:06.193264', 'step': 8095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:06.246450', 'step': 8095, 'epoch': 2} {'type': 'loss', 'content': 0.15721014142036438, 'timestamp': '2025-09-10 02:44:06.252673', 'step': 8096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:06.305276', 'step': 8096, 'epoch': 2} {'type': 'loss', 'content': 0.10135518014431, 'timestamp': '2025-09-10 02:44:06.307357', 'step': 8097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:06.360212', 'step': 8097, 'epoch': 2} {'type': 'loss', 'content': 0.1548895686864853, 'timestamp': '2025-09-10 02:44:06.362405', 'step': 8098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:06.416366', 'step': 8098, 'epoch': 2} {'type': 'loss', 'content': 0.16063933074474335, 'timestamp': '2025-09-10 02:44:06.418531', 'step': 8099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:06.471723', 'step': 8099, 'epoch': 2} {'type': 'loss', 'content': 0.09422815591096878, 'timestamp': '2025-09-10 02:44:06.477886', 'step': 8100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:06.530674', 'step': 8100, 'epoch': 2} {'type': 'loss', 'content': 0.13411806523799896, 'timestamp': '2025-09-10 02:44:06.532816', 'step': 8101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:06.586416', 'step': 8101, 'epoch': 2} {'type': 'loss', 'content': 0.07740048319101334, 'timestamp': '2025-09-10 02:44:06.588545', 'step': 8102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:06.642736', 'step': 8102, 'epoch': 2} {'type': 'loss', 'content': 0.1824309378862381, 'timestamp': '2025-09-10 02:44:06.644911', 'step': 8103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:06.699676', 'step': 8103, 'epoch': 2} {'type': 'loss', 'content': 0.14028319716453552, 'timestamp': '2025-09-10 02:44:06.706017', 'step': 8104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:06.759026', 'step': 8104, 'epoch': 2} {'type': 'loss', 'content': 0.16863417625427246, 'timestamp': '2025-09-10 02:44:06.761190', 'step': 8105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:06.814470', 'step': 8105, 'epoch': 2} {'type': 'loss', 'content': 0.06905226409435272, 'timestamp': '2025-09-10 02:44:06.816617', 'step': 8106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:06.870358', 'step': 8106, 'epoch': 2} {'type': 'loss', 'content': 0.1997935026884079, 'timestamp': '2025-09-10 02:44:06.872520', 'step': 8107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:06.926314', 'step': 8107, 'epoch': 2} {'type': 'loss', 'content': 0.2034147083759308, 'timestamp': '2025-09-10 02:44:06.932929', 'step': 8108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:06.986985', 'step': 8108, 'epoch': 2} {'type': 'loss', 'content': 0.17895296216011047, 'timestamp': '2025-09-10 02:44:06.989460', 'step': 8109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:07.043012', 'step': 8109, 'epoch': 2} {'type': 'loss', 'content': 0.11068714410066605, 'timestamp': '2025-09-10 02:44:07.045285', 'step': 8110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:07.100581', 'step': 8110, 'epoch': 2} {'type': 'loss', 'content': 0.23720765113830566, 'timestamp': '2025-09-10 02:44:07.102869', 'step': 8111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:07.157625', 'step': 8111, 'epoch': 2} {'type': 'loss', 'content': 0.05862757936120033, 'timestamp': '2025-09-10 02:44:07.163746', 'step': 8112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:07.216503', 'step': 8112, 'epoch': 2} {'type': 'loss', 'content': 0.1600891649723053, 'timestamp': '2025-09-10 02:44:07.218664', 'step': 8113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:07.272439', 'step': 8113, 'epoch': 2} {'type': 'loss', 'content': 0.14217792451381683, 'timestamp': '2025-09-10 02:44:07.274590', 'step': 8114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:07.328975', 'step': 8114, 'epoch': 2} {'type': 'loss', 'content': 0.1936364471912384, 'timestamp': '2025-09-10 02:44:07.331180', 'step': 8115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:07.385205', 'step': 8115, 'epoch': 2} {'type': 'loss', 'content': 0.09155894070863724, 'timestamp': '2025-09-10 02:44:07.391354', 'step': 8116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:07.445013', 'step': 8116, 'epoch': 2} {'type': 'loss', 'content': 0.10571174323558807, 'timestamp': '2025-09-10 02:44:07.447294', 'step': 8117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:07.501517', 'step': 8117, 'epoch': 2} {'type': 'loss', 'content': 0.10133213549852371, 'timestamp': '2025-09-10 02:44:07.503662', 'step': 8118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:07.557934', 'step': 8118, 'epoch': 2} {'type': 'loss', 'content': 0.13464923202991486, 'timestamp': '2025-09-10 02:44:07.560140', 'step': 8119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:07.614362', 'step': 8119, 'epoch': 2} {'type': 'loss', 'content': 0.12651614844799042, 'timestamp': '2025-09-10 02:44:07.620304', 'step': 8120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:07.673436', 'step': 8120, 'epoch': 2} {'type': 'loss', 'content': 0.15797634422779083, 'timestamp': '2025-09-10 02:44:07.675351', 'step': 8121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:07.728510', 'step': 8121, 'epoch': 2} {'type': 'loss', 'content': 0.1501048058271408, 'timestamp': '2025-09-10 02:44:07.730682', 'step': 8122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:07.784593', 'step': 8122, 'epoch': 2} {'type': 'loss', 'content': 0.22485707700252533, 'timestamp': '2025-09-10 02:44:07.786798', 'step': 8123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:07.842274', 'step': 8123, 'epoch': 2} {'type': 'loss', 'content': 0.14294545352458954, 'timestamp': '2025-09-10 02:44:07.848901', 'step': 8124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:07.903118', 'step': 8124, 'epoch': 2} {'type': 'loss', 'content': 0.1778416484594345, 'timestamp': '2025-09-10 02:44:07.905319', 'step': 8125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:07.960734', 'step': 8125, 'epoch': 2} {'type': 'loss', 'content': 0.12480912357568741, 'timestamp': '2025-09-10 02:44:07.962966', 'step': 8126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:08.017723', 'step': 8126, 'epoch': 2} {'type': 'loss', 'content': 0.2669748067855835, 'timestamp': '2025-09-10 02:44:08.019818', 'step': 8127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:08.074950', 'step': 8127, 'epoch': 2} {'type': 'loss', 'content': 0.16307319700717926, 'timestamp': '2025-09-10 02:44:08.081331', 'step': 8128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:08.134857', 'step': 8128, 'epoch': 2} {'type': 'loss', 'content': 0.268918514251709, 'timestamp': '2025-09-10 02:44:08.137159', 'step': 8129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:08.191988', 'step': 8129, 'epoch': 2} {'type': 'loss', 'content': 0.14721381664276123, 'timestamp': '2025-09-10 02:44:08.194100', 'step': 8130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:08.247905', 'step': 8130, 'epoch': 2} {'type': 'loss', 'content': 0.10356738418340683, 'timestamp': '2025-09-10 02:44:08.250046', 'step': 8131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:08.303651', 'step': 8131, 'epoch': 2} {'type': 'loss', 'content': 0.08743882924318314, 'timestamp': '2025-09-10 02:44:08.309881', 'step': 8132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:08.364328', 'step': 8132, 'epoch': 2} {'type': 'loss', 'content': 0.14478106796741486, 'timestamp': '2025-09-10 02:44:08.366259', 'step': 8133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:08.420235', 'step': 8133, 'epoch': 2} {'type': 'loss', 'content': 0.11814393103122711, 'timestamp': '2025-09-10 02:44:08.422368', 'step': 8134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:08.476457', 'step': 8134, 'epoch': 2} {'type': 'loss', 'content': 0.15848523378372192, 'timestamp': '2025-09-10 02:44:08.478682', 'step': 8135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:08.531090', 'step': 8135, 'epoch': 2} {'type': 'loss', 'content': 0.11138045787811279, 'timestamp': '2025-09-10 02:44:08.536829', 'step': 8136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:08.589753', 'step': 8136, 'epoch': 2} {'type': 'loss', 'content': 0.11548784375190735, 'timestamp': '2025-09-10 02:44:08.591933', 'step': 8137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:08.647250', 'step': 8137, 'epoch': 2} {'type': 'loss', 'content': 0.18649154901504517, 'timestamp': '2025-09-10 02:44:08.649620', 'step': 8138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:08.703505', 'step': 8138, 'epoch': 2} {'type': 'loss', 'content': 0.14783087372779846, 'timestamp': '2025-09-10 02:44:08.705716', 'step': 8139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:08.759149', 'step': 8139, 'epoch': 2} {'type': 'loss', 'content': 0.11690638214349747, 'timestamp': '2025-09-10 02:44:08.765340', 'step': 8140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:08.817921', 'step': 8140, 'epoch': 2} {'type': 'loss', 'content': 0.11687847971916199, 'timestamp': '2025-09-10 02:44:08.820026', 'step': 8141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:08.873848', 'step': 8141, 'epoch': 2} {'type': 'loss', 'content': 0.13316646218299866, 'timestamp': '2025-09-10 02:44:08.876016', 'step': 8142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:08.929425', 'step': 8142, 'epoch': 2} {'type': 'loss', 'content': 0.14987535774707794, 'timestamp': '2025-09-10 02:44:08.931549', 'step': 8143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:08.984293', 'step': 8143, 'epoch': 2} {'type': 'loss', 'content': 0.07659486681222916, 'timestamp': '2025-09-10 02:44:08.990323', 'step': 8144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:09.043617', 'step': 8144, 'epoch': 2} {'type': 'loss', 'content': 0.16630253195762634, 'timestamp': '2025-09-10 02:44:09.045717', 'step': 8145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:09.099490', 'step': 8145, 'epoch': 2} {'type': 'loss', 'content': 0.15463818609714508, 'timestamp': '2025-09-10 02:44:09.101661', 'step': 8146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:09.155897', 'step': 8146, 'epoch': 2} {'type': 'loss', 'content': 0.0992814302444458, 'timestamp': '2025-09-10 02:44:09.158098', 'step': 8147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:09.211405', 'step': 8147, 'epoch': 2} {'type': 'loss', 'content': 0.09994865953922272, 'timestamp': '2025-09-10 02:44:09.217461', 'step': 8148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:09.271338', 'step': 8148, 'epoch': 2} {'type': 'loss', 'content': 0.1503530591726303, 'timestamp': '2025-09-10 02:44:09.273460', 'step': 8149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:09.328094', 'step': 8149, 'epoch': 2} {'type': 'loss', 'content': 0.18720757961273193, 'timestamp': '2025-09-10 02:44:09.330267', 'step': 8150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:09.385048', 'step': 8150, 'epoch': 2} {'type': 'loss', 'content': 0.09840617328882217, 'timestamp': '2025-09-10 02:44:09.387225', 'step': 8151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:09.441067', 'step': 8151, 'epoch': 2} {'type': 'loss', 'content': 0.16352465748786926, 'timestamp': '2025-09-10 02:44:09.447424', 'step': 8152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:09.501932', 'step': 8152, 'epoch': 2} {'type': 'loss', 'content': 0.18317505717277527, 'timestamp': '2025-09-10 02:44:09.504403', 'step': 8153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:09.557825', 'step': 8153, 'epoch': 2} {'type': 'loss', 'content': 0.2580074667930603, 'timestamp': '2025-09-10 02:44:09.560151', 'step': 8154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:09.613877', 'step': 8154, 'epoch': 2} {'type': 'loss', 'content': 0.2008737325668335, 'timestamp': '2025-09-10 02:44:09.616072', 'step': 8155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:09.670227', 'step': 8155, 'epoch': 2} {'type': 'loss', 'content': 0.1287316381931305, 'timestamp': '2025-09-10 02:44:09.676696', 'step': 8156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:09.729641', 'step': 8156, 'epoch': 2} {'type': 'loss', 'content': 0.13202281296253204, 'timestamp': '2025-09-10 02:44:09.731790', 'step': 8157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:09.785860', 'step': 8157, 'epoch': 2} {'type': 'loss', 'content': 0.15327145159244537, 'timestamp': '2025-09-10 02:44:09.788060', 'step': 8158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:09.841645', 'step': 8158, 'epoch': 2} {'type': 'loss', 'content': 0.16432976722717285, 'timestamp': '2025-09-10 02:44:09.843796', 'step': 8159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:09.897906', 'step': 8159, 'epoch': 2} {'type': 'loss', 'content': 0.08122128993272781, 'timestamp': '2025-09-10 02:44:09.904455', 'step': 8160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:09.959228', 'step': 8160, 'epoch': 2} {'type': 'loss', 'content': 0.08964037895202637, 'timestamp': '2025-09-10 02:44:09.961488', 'step': 8161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:10.015234', 'step': 8161, 'epoch': 2} {'type': 'loss', 'content': 0.06235980987548828, 'timestamp': '2025-09-10 02:44:10.017557', 'step': 8162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:10.071318', 'step': 8162, 'epoch': 2} {'type': 'loss', 'content': 0.16728541254997253, 'timestamp': '2025-09-10 02:44:10.073302', 'step': 8163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:10.128645', 'step': 8163, 'epoch': 2} {'type': 'loss', 'content': 0.23357141017913818, 'timestamp': '2025-09-10 02:44:10.135108', 'step': 8164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:10.190640', 'step': 8164, 'epoch': 2} {'type': 'loss', 'content': 0.1262439787387848, 'timestamp': '2025-09-10 02:44:10.192781', 'step': 8165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:10.246113', 'step': 8165, 'epoch': 2} {'type': 'loss', 'content': 0.1920442283153534, 'timestamp': '2025-09-10 02:44:10.248598', 'step': 8166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:10.302025', 'step': 8166, 'epoch': 2} {'type': 'loss', 'content': 0.2572534382343292, 'timestamp': '2025-09-10 02:44:10.304361', 'step': 8167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:10.357507', 'step': 8167, 'epoch': 2} {'type': 'loss', 'content': 0.13554608821868896, 'timestamp': '2025-09-10 02:44:10.363417', 'step': 8168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:10.416237', 'step': 8168, 'epoch': 2} {'type': 'loss', 'content': 0.09406445175409317, 'timestamp': '2025-09-10 02:44:10.418431', 'step': 8169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:10.471655', 'step': 8169, 'epoch': 2} {'type': 'loss', 'content': 0.13566003739833832, 'timestamp': '2025-09-10 02:44:10.473833', 'step': 8170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:10.527353', 'step': 8170, 'epoch': 2} {'type': 'loss', 'content': 0.16364894807338715, 'timestamp': '2025-09-10 02:44:10.529554', 'step': 8171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:10.583119', 'step': 8171, 'epoch': 2} {'type': 'loss', 'content': 0.1205371543765068, 'timestamp': '2025-09-10 02:44:10.588963', 'step': 8172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:10.642381', 'step': 8172, 'epoch': 2} {'type': 'loss', 'content': 0.1592446118593216, 'timestamp': '2025-09-10 02:44:10.644446', 'step': 8173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:10.697849', 'step': 8173, 'epoch': 2} {'type': 'loss', 'content': 0.14937834441661835, 'timestamp': '2025-09-10 02:44:10.700073', 'step': 8174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:10.754193', 'step': 8174, 'epoch': 2} {'type': 'loss', 'content': 0.09147226810455322, 'timestamp': '2025-09-10 02:44:10.756195', 'step': 8175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:10.809500', 'step': 8175, 'epoch': 2} {'type': 'loss', 'content': 0.10276097059249878, 'timestamp': '2025-09-10 02:44:10.815352', 'step': 8176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:10.868279', 'step': 8176, 'epoch': 2} {'type': 'loss', 'content': 0.08289433270692825, 'timestamp': '2025-09-10 02:44:10.870253', 'step': 8177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:10.923302', 'step': 8177, 'epoch': 2} {'type': 'loss', 'content': 0.16605131328105927, 'timestamp': '2025-09-10 02:44:10.925327', 'step': 8178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:10.977831', 'step': 8178, 'epoch': 2} {'type': 'loss', 'content': 0.17086641490459442, 'timestamp': '2025-09-10 02:44:10.979829', 'step': 8179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:11.032535', 'step': 8179, 'epoch': 2} {'type': 'loss', 'content': 0.05881344527006149, 'timestamp': '2025-09-10 02:44:11.038571', 'step': 8180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:11.090879', 'step': 8180, 'epoch': 2} {'type': 'loss', 'content': 0.08817444741725922, 'timestamp': '2025-09-10 02:44:11.093078', 'step': 8181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:11.146141', 'step': 8181, 'epoch': 2} {'type': 'loss', 'content': 0.16804762184619904, 'timestamp': '2025-09-10 02:44:11.148204', 'step': 8182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:11.202450', 'step': 8182, 'epoch': 2} {'type': 'loss', 'content': 0.17466707527637482, 'timestamp': '2025-09-10 02:44:11.204578', 'step': 8183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:11.257796', 'step': 8183, 'epoch': 2} {'type': 'loss', 'content': 0.04078950732946396, 'timestamp': '2025-09-10 02:44:11.263806', 'step': 8184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:11.316685', 'step': 8184, 'epoch': 2} {'type': 'loss', 'content': 0.1519346535205841, 'timestamp': '2025-09-10 02:44:11.318684', 'step': 8185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:11.371607', 'step': 8185, 'epoch': 2} {'type': 'loss', 'content': 0.16209371387958527, 'timestamp': '2025-09-10 02:44:11.373585', 'step': 8186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:11.429029', 'step': 8186, 'epoch': 2} {'type': 'loss', 'content': 0.16033245623111725, 'timestamp': '2025-09-10 02:44:11.431171', 'step': 8187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:11.489476', 'step': 8187, 'epoch': 2} {'type': 'loss', 'content': 0.07472556829452515, 'timestamp': '2025-09-10 02:44:11.495452', 'step': 8188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:11.548088', 'step': 8188, 'epoch': 2} {'type': 'loss', 'content': 0.13763664662837982, 'timestamp': '2025-09-10 02:44:11.554447', 'step': 8189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:11.608417', 'step': 8189, 'epoch': 2} {'type': 'loss', 'content': 0.17920641601085663, 'timestamp': '2025-09-10 02:44:11.610509', 'step': 8190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:11.667084', 'step': 8190, 'epoch': 2} {'type': 'loss', 'content': 0.146804541349411, 'timestamp': '2025-09-10 02:44:11.669774', 'step': 8191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:11.723046', 'step': 8191, 'epoch': 2} {'type': 'loss', 'content': 0.10529535263776779, 'timestamp': '2025-09-10 02:44:11.728612', 'step': 8192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:11.782702', 'step': 8192, 'epoch': 2} {'type': 'loss', 'content': 0.1629611700773239, 'timestamp': '2025-09-10 02:44:11.784816', 'step': 8193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:11.839237', 'step': 8193, 'epoch': 2} {'type': 'loss', 'content': 0.20837394893169403, 'timestamp': '2025-09-10 02:44:11.847226', 'step': 8194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:11.901476', 'step': 8194, 'epoch': 2} {'type': 'loss', 'content': 0.22757193446159363, 'timestamp': '2025-09-10 02:44:11.903739', 'step': 8195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:11.956335', 'step': 8195, 'epoch': 2} {'type': 'loss', 'content': 0.106936976313591, 'timestamp': '2025-09-10 02:44:11.965309', 'step': 8196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:12.027913', 'step': 8196, 'epoch': 2} {'type': 'loss', 'content': 0.12072654068470001, 'timestamp': '2025-09-10 02:44:12.030124', 'step': 8197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:12.083639', 'step': 8197, 'epoch': 2} {'type': 'loss', 'content': 0.11639443039894104, 'timestamp': '2025-09-10 02:44:12.086137', 'step': 8198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:12.139190', 'step': 8198, 'epoch': 2} {'type': 'loss', 'content': 0.14186888933181763, 'timestamp': '2025-09-10 02:44:12.141548', 'step': 8199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:12.194971', 'step': 8199, 'epoch': 2} {'type': 'loss', 'content': 0.05325452610850334, 'timestamp': '2025-09-10 02:44:12.201138', 'step': 8200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:12.254651', 'step': 8200, 'epoch': 2} {'type': 'loss', 'content': 0.13290056586265564, 'timestamp': '2025-09-10 02:44:12.257017', 'step': 8201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:12.310621', 'step': 8201, 'epoch': 2} {'type': 'loss', 'content': 0.11490299552679062, 'timestamp': '2025-09-10 02:44:12.312703', 'step': 8202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:12.367734', 'step': 8202, 'epoch': 2} {'type': 'loss', 'content': 0.23363564908504486, 'timestamp': '2025-09-10 02:44:12.370149', 'step': 8203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:12.423329', 'step': 8203, 'epoch': 2} {'type': 'loss', 'content': 0.17009755969047546, 'timestamp': '2025-09-10 02:44:12.429180', 'step': 8204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:12.481233', 'step': 8204, 'epoch': 2} {'type': 'loss', 'content': 0.06994155794382095, 'timestamp': '2025-09-10 02:44:12.483664', 'step': 8205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:12.536757', 'step': 8205, 'epoch': 2} {'type': 'loss', 'content': 0.12062086164951324, 'timestamp': '2025-09-10 02:44:12.539026', 'step': 8206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:12.592551', 'step': 8206, 'epoch': 2} {'type': 'loss', 'content': 0.18355154991149902, 'timestamp': '2025-09-10 02:44:12.594925', 'step': 8207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:12.648459', 'step': 8207, 'epoch': 2} {'type': 'loss', 'content': 0.16171301901340485, 'timestamp': '2025-09-10 02:44:12.654329', 'step': 8208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:12.707137', 'step': 8208, 'epoch': 2} {'type': 'loss', 'content': 0.1415352076292038, 'timestamp': '2025-09-10 02:44:12.709180', 'step': 8209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:12.762169', 'step': 8209, 'epoch': 2} {'type': 'loss', 'content': 0.17369136214256287, 'timestamp': '2025-09-10 02:44:12.764215', 'step': 8210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:12.818063', 'step': 8210, 'epoch': 2} {'type': 'loss', 'content': 0.19200827181339264, 'timestamp': '2025-09-10 02:44:12.820397', 'step': 8211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:12.874855', 'step': 8211, 'epoch': 2} {'type': 'loss', 'content': 0.2107071727514267, 'timestamp': '2025-09-10 02:44:12.880947', 'step': 8212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:12.934300', 'step': 8212, 'epoch': 2} {'type': 'loss', 'content': 0.13549405336380005, 'timestamp': '2025-09-10 02:44:12.936547', 'step': 8213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:12.989600', 'step': 8213, 'epoch': 2} {'type': 'loss', 'content': 0.20748969912528992, 'timestamp': '2025-09-10 02:44:12.991822', 'step': 8214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:13.045947', 'step': 8214, 'epoch': 2} {'type': 'loss', 'content': 0.12923242151737213, 'timestamp': '2025-09-10 02:44:13.048119', 'step': 8215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:13.101797', 'step': 8215, 'epoch': 2} {'type': 'loss', 'content': 0.12456995993852615, 'timestamp': '2025-09-10 02:44:13.107948', 'step': 8216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:13.173839', 'step': 8216, 'epoch': 2} {'type': 'loss', 'content': 0.16322535276412964, 'timestamp': '2025-09-10 02:44:13.176188', 'step': 8217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:13.230933', 'step': 8217, 'epoch': 2} {'type': 'loss', 'content': 0.29486221075057983, 'timestamp': '2025-09-10 02:44:13.233111', 'step': 8218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:13.286612', 'step': 8218, 'epoch': 2} {'type': 'loss', 'content': 0.05829790607094765, 'timestamp': '2025-09-10 02:44:13.288755', 'step': 8219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:13.342719', 'step': 8219, 'epoch': 2} {'type': 'loss', 'content': 0.10693421959877014, 'timestamp': '2025-09-10 02:44:13.349073', 'step': 8220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:13.402778', 'step': 8220, 'epoch': 2} {'type': 'loss', 'content': 0.09487838298082352, 'timestamp': '2025-09-10 02:44:13.405000', 'step': 8221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:13.458520', 'step': 8221, 'epoch': 2} {'type': 'loss', 'content': 0.12304972112178802, 'timestamp': '2025-09-10 02:44:13.460773', 'step': 8222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:13.514825', 'step': 8222, 'epoch': 2} {'type': 'loss', 'content': 0.19929338991641998, 'timestamp': '2025-09-10 02:44:13.516742', 'step': 8223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:13.570321', 'step': 8223, 'epoch': 2} {'type': 'loss', 'content': 0.08277178555727005, 'timestamp': '2025-09-10 02:44:13.576526', 'step': 8224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:13.628646', 'step': 8224, 'epoch': 2} {'type': 'loss', 'content': 0.23860782384872437, 'timestamp': '2025-09-10 02:44:13.630888', 'step': 8225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:13.683963', 'step': 8225, 'epoch': 2} {'type': 'loss', 'content': 0.17518708109855652, 'timestamp': '2025-09-10 02:44:13.686100', 'step': 8226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:13.742061', 'step': 8226, 'epoch': 2} {'type': 'loss', 'content': 0.07911071181297302, 'timestamp': '2025-09-10 02:44:13.744110', 'step': 8227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:13.799243', 'step': 8227, 'epoch': 2} {'type': 'loss', 'content': 0.1618267297744751, 'timestamp': '2025-09-10 02:44:13.805144', 'step': 8228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:13.861158', 'step': 8228, 'epoch': 2} {'type': 'loss', 'content': 0.18789543211460114, 'timestamp': '2025-09-10 02:44:13.863807', 'step': 8229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:13.920757', 'step': 8229, 'epoch': 2} {'type': 'loss', 'content': 0.11024869978427887, 'timestamp': '2025-09-10 02:44:13.922929', 'step': 8230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:13.977398', 'step': 8230, 'epoch': 2} {'type': 'loss', 'content': 0.11611635982990265, 'timestamp': '2025-09-10 02:44:13.979358', 'step': 8231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:14.032506', 'step': 8231, 'epoch': 2} {'type': 'loss', 'content': 0.05357592552900314, 'timestamp': '2025-09-10 02:44:14.038734', 'step': 8232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:14.092752', 'step': 8232, 'epoch': 2} {'type': 'loss', 'content': 0.13309124112129211, 'timestamp': '2025-09-10 02:44:14.094940', 'step': 8233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:14.149158', 'step': 8233, 'epoch': 2} {'type': 'loss', 'content': 0.14178061485290527, 'timestamp': '2025-09-10 02:44:14.151523', 'step': 8234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:14.206347', 'step': 8234, 'epoch': 2} {'type': 'loss', 'content': 0.08262558281421661, 'timestamp': '2025-09-10 02:44:14.208489', 'step': 8235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:14.263190', 'step': 8235, 'epoch': 2} {'type': 'loss', 'content': 0.1469375044107437, 'timestamp': '2025-09-10 02:44:14.269557', 'step': 8236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:14.322649', 'step': 8236, 'epoch': 2} {'type': 'loss', 'content': 0.20198458433151245, 'timestamp': '2025-09-10 02:44:14.324828', 'step': 8237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:14.378236', 'step': 8237, 'epoch': 2} {'type': 'loss', 'content': 0.05203816667199135, 'timestamp': '2025-09-10 02:44:14.380384', 'step': 8238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:14.433632', 'step': 8238, 'epoch': 2} {'type': 'loss', 'content': 0.17066490650177002, 'timestamp': '2025-09-10 02:44:14.435910', 'step': 8239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:14.489604', 'step': 8239, 'epoch': 2} {'type': 'loss', 'content': 0.1973308026790619, 'timestamp': '2025-09-10 02:44:14.496141', 'step': 8240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:14.549383', 'step': 8240, 'epoch': 2} {'type': 'loss', 'content': 0.06607002764940262, 'timestamp': '2025-09-10 02:44:14.551693', 'step': 8241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:14.605319', 'step': 8241, 'epoch': 2} {'type': 'loss', 'content': 0.11498069763183594, 'timestamp': '2025-09-10 02:44:14.607569', 'step': 8242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:14.661813', 'step': 8242, 'epoch': 2} {'type': 'loss', 'content': 0.15137438476085663, 'timestamp': '2025-09-10 02:44:14.664233', 'step': 8243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:14.718128', 'step': 8243, 'epoch': 2} {'type': 'loss', 'content': 0.14842894673347473, 'timestamp': '2025-09-10 02:44:14.724259', 'step': 8244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:14.777404', 'step': 8244, 'epoch': 2} {'type': 'loss', 'content': 0.12171602249145508, 'timestamp': '2025-09-10 02:44:14.779575', 'step': 8245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:14.832640', 'step': 8245, 'epoch': 2} {'type': 'loss', 'content': 0.21642781794071198, 'timestamp': '2025-09-10 02:44:14.834824', 'step': 8246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:14.888079', 'step': 8246, 'epoch': 2} {'type': 'loss', 'content': 0.16255617141723633, 'timestamp': '2025-09-10 02:44:14.890033', 'step': 8247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:14.943067', 'step': 8247, 'epoch': 2} {'type': 'loss', 'content': 0.10341856628656387, 'timestamp': '2025-09-10 02:44:14.949084', 'step': 8248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:15.002209', 'step': 8248, 'epoch': 2} {'type': 'loss', 'content': 0.09660252928733826, 'timestamp': '2025-09-10 02:44:15.004717', 'step': 8249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:15.059206', 'step': 8249, 'epoch': 2} {'type': 'loss', 'content': 0.20225295424461365, 'timestamp': '2025-09-10 02:44:15.061471', 'step': 8250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:15.115206', 'step': 8250, 'epoch': 2} {'type': 'loss', 'content': 0.16467958688735962, 'timestamp': '2025-09-10 02:44:15.117491', 'step': 8251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:15.171477', 'step': 8251, 'epoch': 2} {'type': 'loss', 'content': 0.11751408129930496, 'timestamp': '2025-09-10 02:44:15.177701', 'step': 8252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:15.230682', 'step': 8252, 'epoch': 2} {'type': 'loss', 'content': 0.12102604657411575, 'timestamp': '2025-09-10 02:44:15.232839', 'step': 8253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:15.287255', 'step': 8253, 'epoch': 2} {'type': 'loss', 'content': 0.19512170553207397, 'timestamp': '2025-09-10 02:44:15.289665', 'step': 8254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:15.343680', 'step': 8254, 'epoch': 2} {'type': 'loss', 'content': 0.09141877293586731, 'timestamp': '2025-09-10 02:44:15.346052', 'step': 8255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:15.400574', 'step': 8255, 'epoch': 2} {'type': 'loss', 'content': 0.11831987649202347, 'timestamp': '2025-09-10 02:44:15.406642', 'step': 8256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:15.459953', 'step': 8256, 'epoch': 2} {'type': 'loss', 'content': 0.06827981024980545, 'timestamp': '2025-09-10 02:44:15.462286', 'step': 8257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:15.516306', 'step': 8257, 'epoch': 2} {'type': 'loss', 'content': 0.08373863995075226, 'timestamp': '2025-09-10 02:44:15.518466', 'step': 8258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:15.572897', 'step': 8258, 'epoch': 2} {'type': 'loss', 'content': 0.1860569268465042, 'timestamp': '2025-09-10 02:44:15.575140', 'step': 8259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:15.628678', 'step': 8259, 'epoch': 2} {'type': 'loss', 'content': 0.1321118026971817, 'timestamp': '2025-09-10 02:44:15.634795', 'step': 8260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:15.688435', 'step': 8260, 'epoch': 2} {'type': 'loss', 'content': 0.13874760270118713, 'timestamp': '2025-09-10 02:44:15.690598', 'step': 8261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:15.744181', 'step': 8261, 'epoch': 2} {'type': 'loss', 'content': 0.11079978942871094, 'timestamp': '2025-09-10 02:44:15.746338', 'step': 8262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:15.800983', 'step': 8262, 'epoch': 2} {'type': 'loss', 'content': 0.2198154181241989, 'timestamp': '2025-09-10 02:44:15.803204', 'step': 8263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:15.857701', 'step': 8263, 'epoch': 2} {'type': 'loss', 'content': 0.09050672501325607, 'timestamp': '2025-09-10 02:44:15.863847', 'step': 8264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:15.917917', 'step': 8264, 'epoch': 2} {'type': 'loss', 'content': 0.05974704027175903, 'timestamp': '2025-09-10 02:44:15.920026', 'step': 8265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:15.974296', 'step': 8265, 'epoch': 2} {'type': 'loss', 'content': 0.14469675719738007, 'timestamp': '2025-09-10 02:44:15.976678', 'step': 8266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:16.030984', 'step': 8266, 'epoch': 2} {'type': 'loss', 'content': 0.1681896448135376, 'timestamp': '2025-09-10 02:44:16.033064', 'step': 8267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:16.087662', 'step': 8267, 'epoch': 2} {'type': 'loss', 'content': 0.17314691841602325, 'timestamp': '2025-09-10 02:44:16.094212', 'step': 8268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:16.148359', 'step': 8268, 'epoch': 2} {'type': 'loss', 'content': 0.18290108442306519, 'timestamp': '2025-09-10 02:44:16.150733', 'step': 8269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:16.205546', 'step': 8269, 'epoch': 2} {'type': 'loss', 'content': 0.14143958687782288, 'timestamp': '2025-09-10 02:44:16.207956', 'step': 8270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:16.262007', 'step': 8270, 'epoch': 2} {'type': 'loss', 'content': 0.13453611731529236, 'timestamp': '2025-09-10 02:44:16.264092', 'step': 8271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:16.319816', 'step': 8271, 'epoch': 2} {'type': 'loss', 'content': 0.19791723787784576, 'timestamp': '2025-09-10 02:44:16.326152', 'step': 8272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:16.380295', 'step': 8272, 'epoch': 2} {'type': 'loss', 'content': 0.08142745494842529, 'timestamp': '2025-09-10 02:44:16.383583', 'step': 8273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:16.438027', 'step': 8273, 'epoch': 2} {'type': 'loss', 'content': 0.14627669751644135, 'timestamp': '2025-09-10 02:44:16.440276', 'step': 8274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:16.494938', 'step': 8274, 'epoch': 2} {'type': 'loss', 'content': 0.1024254560470581, 'timestamp': '2025-09-10 02:44:16.497058', 'step': 8275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:16.555724', 'step': 8275, 'epoch': 2} {'type': 'loss', 'content': 0.1360747069120407, 'timestamp': '2025-09-10 02:44:16.561855', 'step': 8276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:16.616093', 'step': 8276, 'epoch': 2} {'type': 'loss', 'content': 0.17655417323112488, 'timestamp': '2025-09-10 02:44:16.618305', 'step': 8277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:16.674388', 'step': 8277, 'epoch': 2} {'type': 'loss', 'content': 0.14713065326213837, 'timestamp': '2025-09-10 02:44:16.676634', 'step': 8278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:16.730817', 'step': 8278, 'epoch': 2} {'type': 'loss', 'content': 0.12767180800437927, 'timestamp': '2025-09-10 02:44:16.735654', 'step': 8279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:16.790495', 'step': 8279, 'epoch': 2} {'type': 'loss', 'content': 0.11118609458208084, 'timestamp': '2025-09-10 02:44:16.797036', 'step': 8280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:16.855243', 'step': 8280, 'epoch': 2} {'type': 'loss', 'content': 0.19236062467098236, 'timestamp': '2025-09-10 02:44:16.857602', 'step': 8281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:16.916033', 'step': 8281, 'epoch': 2} {'type': 'loss', 'content': 0.1101352721452713, 'timestamp': '2025-09-10 02:44:16.918476', 'step': 8282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:16.976706', 'step': 8282, 'epoch': 2} {'type': 'loss', 'content': 0.09876422584056854, 'timestamp': '2025-09-10 02:44:16.980939', 'step': 8283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:17.035571', 'step': 8283, 'epoch': 2} {'type': 'loss', 'content': 0.10870964080095291, 'timestamp': '2025-09-10 02:44:17.041970', 'step': 8284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:17.096628', 'step': 8284, 'epoch': 2} {'type': 'loss', 'content': 0.19374217092990875, 'timestamp': '2025-09-10 02:44:17.098900', 'step': 8285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:17.155182', 'step': 8285, 'epoch': 2} {'type': 'loss', 'content': 0.09518780559301376, 'timestamp': '2025-09-10 02:44:17.157426', 'step': 8286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:17.213729', 'step': 8286, 'epoch': 2} {'type': 'loss', 'content': 0.2757560908794403, 'timestamp': '2025-09-10 02:44:17.215889', 'step': 8287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:17.270108', 'step': 8287, 'epoch': 2} {'type': 'loss', 'content': 0.15353572368621826, 'timestamp': '2025-09-10 02:44:17.276378', 'step': 8288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:17.331606', 'step': 8288, 'epoch': 2} {'type': 'loss', 'content': 0.10364726185798645, 'timestamp': '2025-09-10 02:44:17.333814', 'step': 8289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:17.387440', 'step': 8289, 'epoch': 2} {'type': 'loss', 'content': 0.11314176023006439, 'timestamp': '2025-09-10 02:44:17.389639', 'step': 8290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:17.443468', 'step': 8290, 'epoch': 2} {'type': 'loss', 'content': 0.20503942668437958, 'timestamp': '2025-09-10 02:44:17.445683', 'step': 8291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:17.500419', 'step': 8291, 'epoch': 2} {'type': 'loss', 'content': 0.14477665722370148, 'timestamp': '2025-09-10 02:44:17.506791', 'step': 8292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:17.560675', 'step': 8292, 'epoch': 2} {'type': 'loss', 'content': 0.0802595317363739, 'timestamp': '2025-09-10 02:44:17.562853', 'step': 8293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:17.616363', 'step': 8293, 'epoch': 2} {'type': 'loss', 'content': 0.12015809863805771, 'timestamp': '2025-09-10 02:44:17.618483', 'step': 8294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:17.671835', 'step': 8294, 'epoch': 2} {'type': 'loss', 'content': 0.09420457482337952, 'timestamp': '2025-09-10 02:44:17.674082', 'step': 8295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:17.728172', 'step': 8295, 'epoch': 2} {'type': 'loss', 'content': 0.1098044291138649, 'timestamp': '2025-09-10 02:44:17.734324', 'step': 8296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:17.789931', 'step': 8296, 'epoch': 2} {'type': 'loss', 'content': 0.21267347037792206, 'timestamp': '2025-09-10 02:44:17.792465', 'step': 8297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:17.845738', 'step': 8297, 'epoch': 2} {'type': 'loss', 'content': 0.13305173814296722, 'timestamp': '2025-09-10 02:44:17.848025', 'step': 8298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:17.900356', 'step': 8298, 'epoch': 2} {'type': 'loss', 'content': 0.25054606795310974, 'timestamp': '2025-09-10 02:44:17.902491', 'step': 8299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:17.955437', 'step': 8299, 'epoch': 2} {'type': 'loss', 'content': 0.15011562407016754, 'timestamp': '2025-09-10 02:44:17.961595', 'step': 8300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:18.014532', 'step': 8300, 'epoch': 2} {'type': 'loss', 'content': 0.15819844603538513, 'timestamp': '2025-09-10 02:44:18.016704', 'step': 8301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:18.070617', 'step': 8301, 'epoch': 2} {'type': 'loss', 'content': 0.08967045694589615, 'timestamp': '2025-09-10 02:44:18.072826', 'step': 8302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:18.126141', 'step': 8302, 'epoch': 2} {'type': 'loss', 'content': 0.17862719297409058, 'timestamp': '2025-09-10 02:44:18.128452', 'step': 8303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:18.182219', 'step': 8303, 'epoch': 2} {'type': 'loss', 'content': 0.16920460760593414, 'timestamp': '2025-09-10 02:44:18.188478', 'step': 8304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:18.241852', 'step': 8304, 'epoch': 2} {'type': 'loss', 'content': 0.18105866014957428, 'timestamp': '2025-09-10 02:44:18.244118', 'step': 8305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:18.297319', 'step': 8305, 'epoch': 2} {'type': 'loss', 'content': 0.15003296732902527, 'timestamp': '2025-09-10 02:44:18.299584', 'step': 8306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:18.353212', 'step': 8306, 'epoch': 2} {'type': 'loss', 'content': 0.1226273700594902, 'timestamp': '2025-09-10 02:44:18.355518', 'step': 8307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:18.409886', 'step': 8307, 'epoch': 2} {'type': 'loss', 'content': 0.1458338052034378, 'timestamp': '2025-09-10 02:44:18.416015', 'step': 8308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:18.469005', 'step': 8308, 'epoch': 2} {'type': 'loss', 'content': 0.12447266280651093, 'timestamp': '2025-09-10 02:44:18.471303', 'step': 8309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:18.524661', 'step': 8309, 'epoch': 2} {'type': 'loss', 'content': 0.11378955841064453, 'timestamp': '2025-09-10 02:44:18.526911', 'step': 8310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:18.580695', 'step': 8310, 'epoch': 2} {'type': 'loss', 'content': 0.18402791023254395, 'timestamp': '2025-09-10 02:44:18.582821', 'step': 8311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:18.636727', 'step': 8311, 'epoch': 2} {'type': 'loss', 'content': 0.13570812344551086, 'timestamp': '2025-09-10 02:44:18.643089', 'step': 8312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:18.696180', 'step': 8312, 'epoch': 2} {'type': 'loss', 'content': 0.05463273450732231, 'timestamp': '2025-09-10 02:44:18.698395', 'step': 8313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:18.752099', 'step': 8313, 'epoch': 2} {'type': 'loss', 'content': 0.1337314248085022, 'timestamp': '2025-09-10 02:44:18.754333', 'step': 8314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:18.808397', 'step': 8314, 'epoch': 2} {'type': 'loss', 'content': 0.16007274389266968, 'timestamp': '2025-09-10 02:44:18.810606', 'step': 8315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:18.864048', 'step': 8315, 'epoch': 2} {'type': 'loss', 'content': 0.1337520331144333, 'timestamp': '2025-09-10 02:44:18.870078', 'step': 8316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:18.922889', 'step': 8316, 'epoch': 2} {'type': 'loss', 'content': 0.1818273961544037, 'timestamp': '2025-09-10 02:44:18.925131', 'step': 8317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:18.978977', 'step': 8317, 'epoch': 2} {'type': 'loss', 'content': 0.1838666945695877, 'timestamp': '2025-09-10 02:44:18.981266', 'step': 8318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:19.034910', 'step': 8318, 'epoch': 2} {'type': 'loss', 'content': 0.08396304398775101, 'timestamp': '2025-09-10 02:44:19.037281', 'step': 8319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:19.090976', 'step': 8319, 'epoch': 2} {'type': 'loss', 'content': 0.16440987586975098, 'timestamp': '2025-09-10 02:44:19.097335', 'step': 8320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:19.150727', 'step': 8320, 'epoch': 2} {'type': 'loss', 'content': 0.08944476395845413, 'timestamp': '2025-09-10 02:44:19.152812', 'step': 8321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:19.207806', 'step': 8321, 'epoch': 2} {'type': 'loss', 'content': 0.07185154408216476, 'timestamp': '2025-09-10 02:44:19.209760', 'step': 8322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:19.264061', 'step': 8322, 'epoch': 2} {'type': 'loss', 'content': 0.2548692226409912, 'timestamp': '2025-09-10 02:44:19.266299', 'step': 8323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:19.319467', 'step': 8323, 'epoch': 2} {'type': 'loss', 'content': 0.09264805912971497, 'timestamp': '2025-09-10 02:44:19.325492', 'step': 8324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:19.378305', 'step': 8324, 'epoch': 2} {'type': 'loss', 'content': 0.17860499024391174, 'timestamp': '2025-09-10 02:44:19.380579', 'step': 8325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:19.436715', 'step': 8325, 'epoch': 2} {'type': 'loss', 'content': 0.19095754623413086, 'timestamp': '2025-09-10 02:44:19.438969', 'step': 8326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:19.493244', 'step': 8326, 'epoch': 2} {'type': 'loss', 'content': 0.11797168850898743, 'timestamp': '2025-09-10 02:44:19.495595', 'step': 8327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:19.550299', 'step': 8327, 'epoch': 2} {'type': 'loss', 'content': 0.21393398940563202, 'timestamp': '2025-09-10 02:44:19.556419', 'step': 8328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:19.611196', 'step': 8328, 'epoch': 2} {'type': 'loss', 'content': 0.1326500028371811, 'timestamp': '2025-09-10 02:44:19.613196', 'step': 8329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:19.667091', 'step': 8329, 'epoch': 2} {'type': 'loss', 'content': 0.1667591780424118, 'timestamp': '2025-09-10 02:44:19.669192', 'step': 8330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:19.723305', 'step': 8330, 'epoch': 2} {'type': 'loss', 'content': 0.05167181417346001, 'timestamp': '2025-09-10 02:44:19.725516', 'step': 8331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:19.779567', 'step': 8331, 'epoch': 2} {'type': 'loss', 'content': 0.13303877413272858, 'timestamp': '2025-09-10 02:44:19.785590', 'step': 8332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:19.839996', 'step': 8332, 'epoch': 2} {'type': 'loss', 'content': 0.12255747616291046, 'timestamp': '2025-09-10 02:44:19.842071', 'step': 8333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:19.896659', 'step': 8333, 'epoch': 2} {'type': 'loss', 'content': 0.24910283088684082, 'timestamp': '2025-09-10 02:44:19.898642', 'step': 8334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:19.952911', 'step': 8334, 'epoch': 2} {'type': 'loss', 'content': 0.11931765079498291, 'timestamp': '2025-09-10 02:44:19.954878', 'step': 8335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:20.008995', 'step': 8335, 'epoch': 2} {'type': 'loss', 'content': 0.15238608419895172, 'timestamp': '2025-09-10 02:44:20.015258', 'step': 8336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:20.068414', 'step': 8336, 'epoch': 2} {'type': 'loss', 'content': 0.12183844298124313, 'timestamp': '2025-09-10 02:44:20.070638', 'step': 8337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:20.124628', 'step': 8337, 'epoch': 2} {'type': 'loss', 'content': 0.17541924118995667, 'timestamp': '2025-09-10 02:44:20.126801', 'step': 8338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:20.180934', 'step': 8338, 'epoch': 2} {'type': 'loss', 'content': 0.12482992559671402, 'timestamp': '2025-09-10 02:44:20.183096', 'step': 8339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:20.237249', 'step': 8339, 'epoch': 2} {'type': 'loss', 'content': 0.10843200236558914, 'timestamp': '2025-09-10 02:44:20.243382', 'step': 8340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:20.305862', 'step': 8340, 'epoch': 2} {'type': 'loss', 'content': 0.10981462895870209, 'timestamp': '2025-09-10 02:44:20.308177', 'step': 8341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:20.363166', 'step': 8341, 'epoch': 2} {'type': 'loss', 'content': 0.1709488034248352, 'timestamp': '2025-09-10 02:44:20.366865', 'step': 8342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:20.425784', 'step': 8342, 'epoch': 2} {'type': 'loss', 'content': 0.1415202021598816, 'timestamp': '2025-09-10 02:44:20.427880', 'step': 8343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:20.481593', 'step': 8343, 'epoch': 2} {'type': 'loss', 'content': 0.15129271149635315, 'timestamp': '2025-09-10 02:44:20.487980', 'step': 8344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:20.547253', 'step': 8344, 'epoch': 2} {'type': 'loss', 'content': 0.1301574558019638, 'timestamp': '2025-09-10 02:44:20.549614', 'step': 8345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:20.604206', 'step': 8345, 'epoch': 2} {'type': 'loss', 'content': 0.27950847148895264, 'timestamp': '2025-09-10 02:44:20.606451', 'step': 8346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:20.662076', 'step': 8346, 'epoch': 2} {'type': 'loss', 'content': 0.18147532641887665, 'timestamp': '2025-09-10 02:44:20.664322', 'step': 8347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:20.718730', 'step': 8347, 'epoch': 2} {'type': 'loss', 'content': 0.20079180598258972, 'timestamp': '2025-09-10 02:44:20.724935', 'step': 8348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:20.779046', 'step': 8348, 'epoch': 2} {'type': 'loss', 'content': 0.22937150299549103, 'timestamp': '2025-09-10 02:44:20.781271', 'step': 8349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:20.836672', 'step': 8349, 'epoch': 2} {'type': 'loss', 'content': 0.06222885102033615, 'timestamp': '2025-09-10 02:44:20.840733', 'step': 8350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:20.896475', 'step': 8350, 'epoch': 2} {'type': 'loss', 'content': 0.15435674786567688, 'timestamp': '2025-09-10 02:44:20.898640', 'step': 8351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:20.953156', 'step': 8351, 'epoch': 2} {'type': 'loss', 'content': 0.09357578307390213, 'timestamp': '2025-09-10 02:44:20.959502', 'step': 8352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:21.013179', 'step': 8352, 'epoch': 2} {'type': 'loss', 'content': 0.12459250539541245, 'timestamp': '2025-09-10 02:44:21.021406', 'step': 8353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:21.077690', 'step': 8353, 'epoch': 2} {'type': 'loss', 'content': 0.14034926891326904, 'timestamp': '2025-09-10 02:44:21.079971', 'step': 8354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:21.137229', 'step': 8354, 'epoch': 2} {'type': 'loss', 'content': 0.10625479370355606, 'timestamp': '2025-09-10 02:44:21.139480', 'step': 8355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:21.194655', 'step': 8355, 'epoch': 2} {'type': 'loss', 'content': 0.14821135997772217, 'timestamp': '2025-09-10 02:44:21.200805', 'step': 8356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:21.254710', 'step': 8356, 'epoch': 2} {'type': 'loss', 'content': 0.13673891127109528, 'timestamp': '2025-09-10 02:44:21.256734', 'step': 8357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:21.311252', 'step': 8357, 'epoch': 2} {'type': 'loss', 'content': 0.10786605626344681, 'timestamp': '2025-09-10 02:44:21.313454', 'step': 8358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:21.366983', 'step': 8358, 'epoch': 2} {'type': 'loss', 'content': 0.13209159672260284, 'timestamp': '2025-09-10 02:44:21.368956', 'step': 8359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:21.423099', 'step': 8359, 'epoch': 2} {'type': 'loss', 'content': 0.2819984555244446, 'timestamp': '2025-09-10 02:44:21.429174', 'step': 8360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:21.482154', 'step': 8360, 'epoch': 2} {'type': 'loss', 'content': 0.08463659137487411, 'timestamp': '2025-09-10 02:44:21.484114', 'step': 8361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:21.538508', 'step': 8361, 'epoch': 2} {'type': 'loss', 'content': 0.11847420781850815, 'timestamp': '2025-09-10 02:44:21.540675', 'step': 8362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:21.594528', 'step': 8362, 'epoch': 2} {'type': 'loss', 'content': 0.10507266223430634, 'timestamp': '2025-09-10 02:44:21.596739', 'step': 8363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:21.650602', 'step': 8363, 'epoch': 2} {'type': 'loss', 'content': 0.11037199944257736, 'timestamp': '2025-09-10 02:44:21.656640', 'step': 8364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:21.709240', 'step': 8364, 'epoch': 2} {'type': 'loss', 'content': 0.15749908983707428, 'timestamp': '2025-09-10 02:44:21.711520', 'step': 8365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:21.764513', 'step': 8365, 'epoch': 2} {'type': 'loss', 'content': 0.1590208262205124, 'timestamp': '2025-09-10 02:44:21.766568', 'step': 8366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:21.820131', 'step': 8366, 'epoch': 2} {'type': 'loss', 'content': 0.11953417956829071, 'timestamp': '2025-09-10 02:44:21.822360', 'step': 8367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:21.875750', 'step': 8367, 'epoch': 2} {'type': 'loss', 'content': 0.11411245167255402, 'timestamp': '2025-09-10 02:44:21.881755', 'step': 8368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:21.937937', 'step': 8368, 'epoch': 2} {'type': 'loss', 'content': 0.18188858032226562, 'timestamp': '2025-09-10 02:44:21.940406', 'step': 8369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:21.995007', 'step': 8369, 'epoch': 2} {'type': 'loss', 'content': 0.12877997756004333, 'timestamp': '2025-09-10 02:44:21.997098', 'step': 8370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:22.051852', 'step': 8370, 'epoch': 2} {'type': 'loss', 'content': 0.15033668279647827, 'timestamp': '2025-09-10 02:44:22.054253', 'step': 8371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:22.108457', 'step': 8371, 'epoch': 2} {'type': 'loss', 'content': 0.16887825727462769, 'timestamp': '2025-09-10 02:44:22.114652', 'step': 8372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:22.167228', 'step': 8372, 'epoch': 2} {'type': 'loss', 'content': 0.11362728476524353, 'timestamp': '2025-09-10 02:44:22.169159', 'step': 8373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:22.222688', 'step': 8373, 'epoch': 2} {'type': 'loss', 'content': 0.08196980506181717, 'timestamp': '2025-09-10 02:44:22.224891', 'step': 8374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:22.278852', 'step': 8374, 'epoch': 2} {'type': 'loss', 'content': 0.2302205115556717, 'timestamp': '2025-09-10 02:44:22.280838', 'step': 8375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:22.335092', 'step': 8375, 'epoch': 2} {'type': 'loss', 'content': 0.13760188221931458, 'timestamp': '2025-09-10 02:44:22.341218', 'step': 8376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:22.394867', 'step': 8376, 'epoch': 2} {'type': 'loss', 'content': 0.22832037508487701, 'timestamp': '2025-09-10 02:44:22.396866', 'step': 8377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:22.451136', 'step': 8377, 'epoch': 2} {'type': 'loss', 'content': 0.16774491965770721, 'timestamp': '2025-09-10 02:44:22.453286', 'step': 8378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:22.507662', 'step': 8378, 'epoch': 2} {'type': 'loss', 'content': 0.21832714974880219, 'timestamp': '2025-09-10 02:44:22.509816', 'step': 8379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:22.563414', 'step': 8379, 'epoch': 2} {'type': 'loss', 'content': 0.1465366929769516, 'timestamp': '2025-09-10 02:44:22.569525', 'step': 8380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:22.622951', 'step': 8380, 'epoch': 2} {'type': 'loss', 'content': 0.20680691301822662, 'timestamp': '2025-09-10 02:44:22.625191', 'step': 8381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:22.678634', 'step': 8381, 'epoch': 2} {'type': 'loss', 'content': 0.12278442084789276, 'timestamp': '2025-09-10 02:44:22.680959', 'step': 8382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:22.734717', 'step': 8382, 'epoch': 2} {'type': 'loss', 'content': 0.13994161784648895, 'timestamp': '2025-09-10 02:44:22.736930', 'step': 8383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:22.790397', 'step': 8383, 'epoch': 2} {'type': 'loss', 'content': 0.20812591910362244, 'timestamp': '2025-09-10 02:44:22.796568', 'step': 8384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:22.855736', 'step': 8384, 'epoch': 2} {'type': 'loss', 'content': 0.09711538255214691, 'timestamp': '2025-09-10 02:44:22.858164', 'step': 8385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:22.912851', 'step': 8385, 'epoch': 2} {'type': 'loss', 'content': 0.2183978110551834, 'timestamp': '2025-09-10 02:44:22.915183', 'step': 8386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:22.969186', 'step': 8386, 'epoch': 2} {'type': 'loss', 'content': 0.19751852750778198, 'timestamp': '2025-09-10 02:44:22.971516', 'step': 8387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:23.025809', 'step': 8387, 'epoch': 2} {'type': 'loss', 'content': 0.13935215771198273, 'timestamp': '2025-09-10 02:44:23.031979', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:44:35.537099', 'step': 8388, 'epoch': 2} {'type': 'pplx', 'content': 10649.718863646747, 'timestamp': '2025-09-10 02:44:35.540227', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:35.597765', 'step': 8388, 'epoch': 2} {'type': 'loss', 'content': 0.12021031230688095, 'timestamp': '2025-09-10 02:44:35.600032', 'step': 8389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:35.655895', 'step': 8389, 'epoch': 2} {'type': 'loss', 'content': 0.10526697337627411, 'timestamp': '2025-09-10 02:44:35.658094', 'step': 8390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:35.716448', 'step': 8390, 'epoch': 2} {'type': 'loss', 'content': 0.10601804405450821, 'timestamp': '2025-09-10 02:44:35.718500', 'step': 8391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:35.775272', 'step': 8391, 'epoch': 2} {'type': 'loss', 'content': 0.22795289754867554, 'timestamp': '2025-09-10 02:44:35.781722', 'step': 8392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:35.837025', 'step': 8392, 'epoch': 2} {'type': 'loss', 'content': 0.08241887390613556, 'timestamp': '2025-09-10 02:44:35.839416', 'step': 8393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:35.903346', 'step': 8393, 'epoch': 2} {'type': 'loss', 'content': 0.11037683486938477, 'timestamp': '2025-09-10 02:44:35.906582', 'step': 8394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:35.964783', 'step': 8394, 'epoch': 2} {'type': 'loss', 'content': 0.16939863562583923, 'timestamp': '2025-09-10 02:44:35.966968', 'step': 8395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:36.022231', 'step': 8395, 'epoch': 2} {'type': 'loss', 'content': 0.0854419618844986, 'timestamp': '2025-09-10 02:44:36.028755', 'step': 8396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:36.083954', 'step': 8396, 'epoch': 2} {'type': 'loss', 'content': 0.16754606366157532, 'timestamp': '2025-09-10 02:44:36.086322', 'step': 8397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:36.140407', 'step': 8397, 'epoch': 2} {'type': 'loss', 'content': 0.18975116312503815, 'timestamp': '2025-09-10 02:44:36.142593', 'step': 8398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:36.197627', 'step': 8398, 'epoch': 2} {'type': 'loss', 'content': 0.1732795536518097, 'timestamp': '2025-09-10 02:44:36.199835', 'step': 8399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:36.254704', 'step': 8399, 'epoch': 2} {'type': 'loss', 'content': 0.18219251930713654, 'timestamp': '2025-09-10 02:44:36.262286', 'step': 8400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:36.317648', 'step': 8400, 'epoch': 2} {'type': 'loss', 'content': 0.14508089423179626, 'timestamp': '2025-09-10 02:44:36.319622', 'step': 8401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:36.374987', 'step': 8401, 'epoch': 2} {'type': 'loss', 'content': 0.15240782499313354, 'timestamp': '2025-09-10 02:44:36.377201', 'step': 8402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:36.433330', 'step': 8402, 'epoch': 2} {'type': 'loss', 'content': 0.15992990136146545, 'timestamp': '2025-09-10 02:44:36.435478', 'step': 8403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:36.494868', 'step': 8403, 'epoch': 2} {'type': 'loss', 'content': 0.08244559913873672, 'timestamp': '2025-09-10 02:44:36.501122', 'step': 8404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:36.554001', 'step': 8404, 'epoch': 2} {'type': 'loss', 'content': 0.1200614720582962, 'timestamp': '2025-09-10 02:44:36.556257', 'step': 8405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:36.610187', 'step': 8405, 'epoch': 2} {'type': 'loss', 'content': 0.09532144665718079, 'timestamp': '2025-09-10 02:44:36.612450', 'step': 8406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:36.666495', 'step': 8406, 'epoch': 2} {'type': 'loss', 'content': 0.14992360770702362, 'timestamp': '2025-09-10 02:44:36.668867', 'step': 8407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:36.722677', 'step': 8407, 'epoch': 2} {'type': 'loss', 'content': 0.16155056655406952, 'timestamp': '2025-09-10 02:44:36.728804', 'step': 8408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:36.783781', 'step': 8408, 'epoch': 2} {'type': 'loss', 'content': 0.16168594360351562, 'timestamp': '2025-09-10 02:44:36.785954', 'step': 8409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:36.839887', 'step': 8409, 'epoch': 2} {'type': 'loss', 'content': 0.12440428882837296, 'timestamp': '2025-09-10 02:44:36.842132', 'step': 8410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:36.895710', 'step': 8410, 'epoch': 2} {'type': 'loss', 'content': 0.17483702301979065, 'timestamp': '2025-09-10 02:44:36.897894', 'step': 8411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:36.952860', 'step': 8411, 'epoch': 2} {'type': 'loss', 'content': 0.15138697624206543, 'timestamp': '2025-09-10 02:44:36.959233', 'step': 8412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.012969', 'step': 8412, 'epoch': 2} {'type': 'loss', 'content': 0.09844563901424408, 'timestamp': '2025-09-10 02:44:37.016394', 'step': 8413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:37.070787', 'step': 8413, 'epoch': 2} {'type': 'loss', 'content': 0.06217193230986595, 'timestamp': '2025-09-10 02:44:37.073384', 'step': 8414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.128518', 'step': 8414, 'epoch': 2} {'type': 'loss', 'content': 0.10292699933052063, 'timestamp': '2025-09-10 02:44:37.130757', 'step': 8415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.185051', 'step': 8415, 'epoch': 2} {'type': 'loss', 'content': 0.10308351367712021, 'timestamp': '2025-09-10 02:44:37.191688', 'step': 8416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.245204', 'step': 8416, 'epoch': 2} {'type': 'loss', 'content': 0.16655325889587402, 'timestamp': '2025-09-10 02:44:37.247598', 'step': 8417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:37.301559', 'step': 8417, 'epoch': 2} {'type': 'loss', 'content': 0.13906922936439514, 'timestamp': '2025-09-10 02:44:37.303953', 'step': 8418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.358399', 'step': 8418, 'epoch': 2} {'type': 'loss', 'content': 0.213521808385849, 'timestamp': '2025-09-10 02:44:37.360843', 'step': 8419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.414371', 'step': 8419, 'epoch': 2} {'type': 'loss', 'content': 0.1450764685869217, 'timestamp': '2025-09-10 02:44:37.420764', 'step': 8420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.474941', 'step': 8420, 'epoch': 2} {'type': 'loss', 'content': 0.11783120781183243, 'timestamp': '2025-09-10 02:44:37.477304', 'step': 8421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:37.532274', 'step': 8421, 'epoch': 2} {'type': 'loss', 'content': 0.10799701511859894, 'timestamp': '2025-09-10 02:44:37.534911', 'step': 8422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.590000', 'step': 8422, 'epoch': 2} {'type': 'loss', 'content': 0.14419136941432953, 'timestamp': '2025-09-10 02:44:37.592686', 'step': 8423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.646827', 'step': 8423, 'epoch': 2} {'type': 'loss', 'content': 0.13001197576522827, 'timestamp': '2025-09-10 02:44:37.653417', 'step': 8424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:37.706136', 'step': 8424, 'epoch': 2} {'type': 'loss', 'content': 0.13706371188163757, 'timestamp': '2025-09-10 02:44:37.708657', 'step': 8425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.762755', 'step': 8425, 'epoch': 2} {'type': 'loss', 'content': 0.11157207190990448, 'timestamp': '2025-09-10 02:44:37.765285', 'step': 8426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:37.819295', 'step': 8426, 'epoch': 2} {'type': 'loss', 'content': 0.10969562828540802, 'timestamp': '2025-09-10 02:44:37.821723', 'step': 8427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:37.876471', 'step': 8427, 'epoch': 2} {'type': 'loss', 'content': 0.15451404452323914, 'timestamp': '2025-09-10 02:44:37.882967', 'step': 8428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:37.955965', 'step': 8428, 'epoch': 2} {'type': 'loss', 'content': 0.10097730904817581, 'timestamp': '2025-09-10 02:44:37.958185', 'step': 8429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:38.046765', 'step': 8429, 'epoch': 2} {'type': 'loss', 'content': 0.1615670919418335, 'timestamp': '2025-09-10 02:44:38.049289', 'step': 8430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:38.121911', 'step': 8430, 'epoch': 2} {'type': 'loss', 'content': 0.12151052057743073, 'timestamp': '2025-09-10 02:44:38.124232', 'step': 8431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:38.180234', 'step': 8431, 'epoch': 2} {'type': 'loss', 'content': 0.20031823217868805, 'timestamp': '2025-09-10 02:44:38.186770', 'step': 8432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:38.240720', 'step': 8432, 'epoch': 2} {'type': 'loss', 'content': 0.1407168209552765, 'timestamp': '2025-09-10 02:44:38.243022', 'step': 8433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:38.296869', 'step': 8433, 'epoch': 2} {'type': 'loss', 'content': 0.08902435004711151, 'timestamp': '2025-09-10 02:44:38.299084', 'step': 8434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:38.352608', 'step': 8434, 'epoch': 2} {'type': 'loss', 'content': 0.18411123752593994, 'timestamp': '2025-09-10 02:44:38.354941', 'step': 8435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:38.408400', 'step': 8435, 'epoch': 2} {'type': 'loss', 'content': 0.1710161715745926, 'timestamp': '2025-09-10 02:44:38.414947', 'step': 8436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:38.469411', 'step': 8436, 'epoch': 2} {'type': 'loss', 'content': 0.13840104639530182, 'timestamp': '2025-09-10 02:44:38.471775', 'step': 8437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:38.526546', 'step': 8437, 'epoch': 2} {'type': 'loss', 'content': 0.1563282459974289, 'timestamp': '2025-09-10 02:44:38.528899', 'step': 8438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:38.584330', 'step': 8438, 'epoch': 2} {'type': 'loss', 'content': 0.16475379467010498, 'timestamp': '2025-09-10 02:44:38.586585', 'step': 8439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:38.640906', 'step': 8439, 'epoch': 2} {'type': 'loss', 'content': 0.13061566650867462, 'timestamp': '2025-09-10 02:44:38.647349', 'step': 8440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:38.701242', 'step': 8440, 'epoch': 2} {'type': 'loss', 'content': 0.09312491118907928, 'timestamp': '2025-09-10 02:44:38.703577', 'step': 8441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:38.762956', 'step': 8441, 'epoch': 2} {'type': 'loss', 'content': 0.11696937680244446, 'timestamp': '2025-09-10 02:44:38.765289', 'step': 8442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:38.821952', 'step': 8442, 'epoch': 2} {'type': 'loss', 'content': 0.07691756635904312, 'timestamp': '2025-09-10 02:44:38.824258', 'step': 8443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:38.877482', 'step': 8443, 'epoch': 2} {'type': 'loss', 'content': 0.21113529801368713, 'timestamp': '2025-09-10 02:44:38.883767', 'step': 8444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:38.936567', 'step': 8444, 'epoch': 2} {'type': 'loss', 'content': 0.1299632340669632, 'timestamp': '2025-09-10 02:44:38.938923', 'step': 8445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:38.992890', 'step': 8445, 'epoch': 2} {'type': 'loss', 'content': 0.06677071750164032, 'timestamp': '2025-09-10 02:44:38.995324', 'step': 8446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:39.049019', 'step': 8446, 'epoch': 2} {'type': 'loss', 'content': 0.10974342375993729, 'timestamp': '2025-09-10 02:44:39.051353', 'step': 8447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:39.106316', 'step': 8447, 'epoch': 2} {'type': 'loss', 'content': 0.16358762979507446, 'timestamp': '2025-09-10 02:44:39.112811', 'step': 8448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:39.166512', 'step': 8448, 'epoch': 2} {'type': 'loss', 'content': 0.20702087879180908, 'timestamp': '2025-09-10 02:44:39.168940', 'step': 8449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:39.223318', 'step': 8449, 'epoch': 2} {'type': 'loss', 'content': 0.10974198579788208, 'timestamp': '2025-09-10 02:44:39.225545', 'step': 8450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:39.279804', 'step': 8450, 'epoch': 2} {'type': 'loss', 'content': 0.0746137723326683, 'timestamp': '2025-09-10 02:44:39.282320', 'step': 8451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:39.336310', 'step': 8451, 'epoch': 2} {'type': 'loss', 'content': 0.17438939213752747, 'timestamp': '2025-09-10 02:44:39.342641', 'step': 8452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:39.398089', 'step': 8452, 'epoch': 2} {'type': 'loss', 'content': 0.11732795089483261, 'timestamp': '2025-09-10 02:44:39.400461', 'step': 8453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:39.454416', 'step': 8453, 'epoch': 2} {'type': 'loss', 'content': 0.17420797049999237, 'timestamp': '2025-09-10 02:44:39.456835', 'step': 8454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:39.510898', 'step': 8454, 'epoch': 2} {'type': 'loss', 'content': 0.13515587151050568, 'timestamp': '2025-09-10 02:44:39.513220', 'step': 8455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:39.568462', 'step': 8455, 'epoch': 2} {'type': 'loss', 'content': 0.14447127282619476, 'timestamp': '2025-09-10 02:44:39.574551', 'step': 8456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:39.628304', 'step': 8456, 'epoch': 2} {'type': 'loss', 'content': 0.13908827304840088, 'timestamp': '2025-09-10 02:44:39.630661', 'step': 8457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:39.685267', 'step': 8457, 'epoch': 2} {'type': 'loss', 'content': 0.16194899380207062, 'timestamp': '2025-09-10 02:44:39.687564', 'step': 8458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:39.741229', 'step': 8458, 'epoch': 2} {'type': 'loss', 'content': 0.1473366618156433, 'timestamp': '2025-09-10 02:44:39.743618', 'step': 8459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:39.796555', 'step': 8459, 'epoch': 2} {'type': 'loss', 'content': 0.11917378753423691, 'timestamp': '2025-09-10 02:44:39.802816', 'step': 8460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:39.855506', 'step': 8460, 'epoch': 2} {'type': 'loss', 'content': 0.12830127775669098, 'timestamp': '2025-09-10 02:44:39.857656', 'step': 8461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:39.910121', 'step': 8461, 'epoch': 2} {'type': 'loss', 'content': 0.17119893431663513, 'timestamp': '2025-09-10 02:44:39.912426', 'step': 8462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:39.966286', 'step': 8462, 'epoch': 2} {'type': 'loss', 'content': 0.23029258847236633, 'timestamp': '2025-09-10 02:44:39.968731', 'step': 8463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:40.021533', 'step': 8463, 'epoch': 2} {'type': 'loss', 'content': 0.09348060190677643, 'timestamp': '2025-09-10 02:44:40.029078', 'step': 8464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:40.082498', 'step': 8464, 'epoch': 2} {'type': 'loss', 'content': 0.13188107311725616, 'timestamp': '2025-09-10 02:44:40.084974', 'step': 8465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:40.140098', 'step': 8465, 'epoch': 2} {'type': 'loss', 'content': 0.13625505566596985, 'timestamp': '2025-09-10 02:44:40.142603', 'step': 8466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:40.197003', 'step': 8466, 'epoch': 2} {'type': 'loss', 'content': 0.17699727416038513, 'timestamp': '2025-09-10 02:44:40.199329', 'step': 8467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:40.265053', 'step': 8467, 'epoch': 2} {'type': 'loss', 'content': 0.103376105427742, 'timestamp': '2025-09-10 02:44:40.271986', 'step': 8468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:40.330153', 'step': 8468, 'epoch': 2} {'type': 'loss', 'content': 0.14989282190799713, 'timestamp': '2025-09-10 02:44:40.332422', 'step': 8469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:40.386021', 'step': 8469, 'epoch': 2} {'type': 'loss', 'content': 0.08409353345632553, 'timestamp': '2025-09-10 02:44:40.388338', 'step': 8470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:40.443787', 'step': 8470, 'epoch': 2} {'type': 'loss', 'content': 0.21462780237197876, 'timestamp': '2025-09-10 02:44:40.446015', 'step': 8471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:40.500873', 'step': 8471, 'epoch': 2} {'type': 'loss', 'content': 0.15504923462867737, 'timestamp': '2025-09-10 02:44:40.507068', 'step': 8472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:40.561293', 'step': 8472, 'epoch': 2} {'type': 'loss', 'content': 0.18064288794994354, 'timestamp': '2025-09-10 02:44:40.563683', 'step': 8473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:40.616419', 'step': 8473, 'epoch': 2} {'type': 'loss', 'content': 0.1156705692410469, 'timestamp': '2025-09-10 02:44:40.618758', 'step': 8474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:40.671517', 'step': 8474, 'epoch': 2} {'type': 'loss', 'content': 0.07459259778261185, 'timestamp': '2025-09-10 02:44:40.673569', 'step': 8475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:40.726131', 'step': 8475, 'epoch': 2} {'type': 'loss', 'content': 0.21105016767978668, 'timestamp': '2025-09-10 02:44:40.732059', 'step': 8476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:40.783727', 'step': 8476, 'epoch': 2} {'type': 'loss', 'content': 0.08665569871664047, 'timestamp': '2025-09-10 02:44:40.785935', 'step': 8477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:40.838735', 'step': 8477, 'epoch': 2} {'type': 'loss', 'content': 0.09486746042966843, 'timestamp': '2025-09-10 02:44:40.841091', 'step': 8478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:40.898949', 'step': 8478, 'epoch': 2} {'type': 'loss', 'content': 0.08496784418821335, 'timestamp': '2025-09-10 02:44:40.901126', 'step': 8479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:40.957294', 'step': 8479, 'epoch': 2} {'type': 'loss', 'content': 0.1585964411497116, 'timestamp': '2025-09-10 02:44:40.963389', 'step': 8480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:41.015942', 'step': 8480, 'epoch': 2} {'type': 'loss', 'content': 0.1253129541873932, 'timestamp': '2025-09-10 02:44:41.018339', 'step': 8481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:41.070566', 'step': 8481, 'epoch': 2} {'type': 'loss', 'content': 0.23331424593925476, 'timestamp': '2025-09-10 02:44:41.072805', 'step': 8482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:41.125520', 'step': 8482, 'epoch': 2} {'type': 'loss', 'content': 0.15835390985012054, 'timestamp': '2025-09-10 02:44:41.127881', 'step': 8483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:41.182471', 'step': 8483, 'epoch': 2} {'type': 'loss', 'content': 0.10576631873846054, 'timestamp': '2025-09-10 02:44:41.188492', 'step': 8484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:41.241351', 'step': 8484, 'epoch': 2} {'type': 'loss', 'content': 0.09670007973909378, 'timestamp': '2025-09-10 02:44:41.243707', 'step': 8485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:41.298148', 'step': 8485, 'epoch': 2} {'type': 'loss', 'content': 0.1614648550748825, 'timestamp': '2025-09-10 02:44:41.300525', 'step': 8486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:41.353532', 'step': 8486, 'epoch': 2} {'type': 'loss', 'content': 0.10021356493234634, 'timestamp': '2025-09-10 02:44:41.355925', 'step': 8487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:41.409706', 'step': 8487, 'epoch': 2} {'type': 'loss', 'content': 0.23352254927158356, 'timestamp': '2025-09-10 02:44:41.415831', 'step': 8488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:41.468728', 'step': 8488, 'epoch': 2} {'type': 'loss', 'content': 0.18272282183170319, 'timestamp': '2025-09-10 02:44:41.471049', 'step': 8489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:41.524325', 'step': 8489, 'epoch': 2} {'type': 'loss', 'content': 0.10438689589500427, 'timestamp': '2025-09-10 02:44:41.526560', 'step': 8490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:41.579362', 'step': 8490, 'epoch': 2} {'type': 'loss', 'content': 0.2672545611858368, 'timestamp': '2025-09-10 02:44:41.581698', 'step': 8491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:41.634489', 'step': 8491, 'epoch': 2} {'type': 'loss', 'content': 0.11191121488809586, 'timestamp': '2025-09-10 02:44:41.640488', 'step': 8492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:41.693945', 'step': 8492, 'epoch': 2} {'type': 'loss', 'content': 0.2383033186197281, 'timestamp': '2025-09-10 02:44:41.696252', 'step': 8493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:41.749504', 'step': 8493, 'epoch': 2} {'type': 'loss', 'content': 0.12846870720386505, 'timestamp': '2025-09-10 02:44:41.752036', 'step': 8494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:41.804067', 'step': 8494, 'epoch': 2} {'type': 'loss', 'content': 0.25723350048065186, 'timestamp': '2025-09-10 02:44:41.806570', 'step': 8495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:41.859183', 'step': 8495, 'epoch': 2} {'type': 'loss', 'content': 0.17716938257217407, 'timestamp': '2025-09-10 02:44:41.865202', 'step': 8496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:41.917836', 'step': 8496, 'epoch': 2} {'type': 'loss', 'content': 0.15148432552814484, 'timestamp': '2025-09-10 02:44:41.920309', 'step': 8497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:41.973618', 'step': 8497, 'epoch': 2} {'type': 'loss', 'content': 0.08141854405403137, 'timestamp': '2025-09-10 02:44:41.975927', 'step': 8498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:42.029350', 'step': 8498, 'epoch': 2} {'type': 'loss', 'content': 0.1127614974975586, 'timestamp': '2025-09-10 02:44:42.031996', 'step': 8499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:42.085288', 'step': 8499, 'epoch': 2} {'type': 'loss', 'content': 0.14894495904445648, 'timestamp': '2025-09-10 02:44:42.091625', 'step': 8500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8500', 'timestamp': '2025-09-10 02:44:42.474572', 'step': 8500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:42.530168', 'step': 8500, 'epoch': 2} {'type': 'loss', 'content': 0.22875359654426575, 'timestamp': '2025-09-10 02:44:42.532516', 'step': 8501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:42.586350', 'step': 8501, 'epoch': 2} {'type': 'loss', 'content': 0.2222462296485901, 'timestamp': '2025-09-10 02:44:42.588672', 'step': 8502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:42.642482', 'step': 8502, 'epoch': 2} {'type': 'loss', 'content': 0.11880555748939514, 'timestamp': '2025-09-10 02:44:42.644850', 'step': 8503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:42.698755', 'step': 8503, 'epoch': 2} {'type': 'loss', 'content': 0.18358223140239716, 'timestamp': '2025-09-10 02:44:42.705244', 'step': 8504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:42.757466', 'step': 8504, 'epoch': 2} {'type': 'loss', 'content': 0.15696632862091064, 'timestamp': '2025-09-10 02:44:42.759724', 'step': 8505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:42.812141', 'step': 8505, 'epoch': 2} {'type': 'loss', 'content': 0.07153386622667313, 'timestamp': '2025-09-10 02:44:42.814512', 'step': 8506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:42.868501', 'step': 8506, 'epoch': 2} {'type': 'loss', 'content': 0.16739730536937714, 'timestamp': '2025-09-10 02:44:42.870880', 'step': 8507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:42.925120', 'step': 8507, 'epoch': 2} {'type': 'loss', 'content': 0.14146633446216583, 'timestamp': '2025-09-10 02:44:42.931591', 'step': 8508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:42.984164', 'step': 8508, 'epoch': 2} {'type': 'loss', 'content': 0.13138115406036377, 'timestamp': '2025-09-10 02:44:42.986716', 'step': 8509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:43.040015', 'step': 8509, 'epoch': 2} {'type': 'loss', 'content': 0.08824404329061508, 'timestamp': '2025-09-10 02:44:43.042448', 'step': 8510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:43.095267', 'step': 8510, 'epoch': 2} {'type': 'loss', 'content': 0.09662314504384995, 'timestamp': '2025-09-10 02:44:43.097681', 'step': 8511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:43.152798', 'step': 8511, 'epoch': 2} {'type': 'loss', 'content': 0.10889647156000137, 'timestamp': '2025-09-10 02:44:43.161327', 'step': 8512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:43.216374', 'step': 8512, 'epoch': 2} {'type': 'loss', 'content': 0.11768882721662521, 'timestamp': '2025-09-10 02:44:43.222912', 'step': 8513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:43.283542', 'step': 8513, 'epoch': 2} {'type': 'loss', 'content': 0.11045531183481216, 'timestamp': '2025-09-10 02:44:43.285912', 'step': 8514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:43.340565', 'step': 8514, 'epoch': 2} {'type': 'loss', 'content': 0.10595962405204773, 'timestamp': '2025-09-10 02:44:43.345087', 'step': 8515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:43.412557', 'step': 8515, 'epoch': 2} {'type': 'loss', 'content': 0.21365642547607422, 'timestamp': '2025-09-10 02:44:43.422583', 'step': 8516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:43.479767', 'step': 8516, 'epoch': 2} {'type': 'loss', 'content': 0.23994696140289307, 'timestamp': '2025-09-10 02:44:43.482146', 'step': 8517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:43.535487', 'step': 8517, 'epoch': 2} {'type': 'loss', 'content': 0.0940731018781662, 'timestamp': '2025-09-10 02:44:43.537878', 'step': 8518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:43.593820', 'step': 8518, 'epoch': 2} {'type': 'loss', 'content': 0.11358271539211273, 'timestamp': '2025-09-10 02:44:43.596231', 'step': 8519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:43.649778', 'step': 8519, 'epoch': 2} {'type': 'loss', 'content': 0.19389884173870087, 'timestamp': '2025-09-10 02:44:43.655991', 'step': 8520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:43.708907', 'step': 8520, 'epoch': 2} {'type': 'loss', 'content': 0.2831588685512543, 'timestamp': '2025-09-10 02:44:43.711453', 'step': 8521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:43.764385', 'step': 8521, 'epoch': 2} {'type': 'loss', 'content': 0.0956992506980896, 'timestamp': '2025-09-10 02:44:43.769138', 'step': 8522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:43.823691', 'step': 8522, 'epoch': 2} {'type': 'loss', 'content': 0.14673466980457306, 'timestamp': '2025-09-10 02:44:43.826231', 'step': 8523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:43.884157', 'step': 8523, 'epoch': 2} {'type': 'loss', 'content': 0.15039730072021484, 'timestamp': '2025-09-10 02:44:43.890401', 'step': 8524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:43.944622', 'step': 8524, 'epoch': 2} {'type': 'loss', 'content': 0.1499517410993576, 'timestamp': '2025-09-10 02:44:43.947187', 'step': 8525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:44.000426', 'step': 8525, 'epoch': 2} {'type': 'loss', 'content': 0.14794129133224487, 'timestamp': '2025-09-10 02:44:44.005918', 'step': 8526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:44.059515', 'step': 8526, 'epoch': 2} {'type': 'loss', 'content': 0.13663391768932343, 'timestamp': '2025-09-10 02:44:44.069984', 'step': 8527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:44.128324', 'step': 8527, 'epoch': 2} {'type': 'loss', 'content': 0.1662604957818985, 'timestamp': '2025-09-10 02:44:44.134436', 'step': 8528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:44.186391', 'step': 8528, 'epoch': 2} {'type': 'loss', 'content': 0.12463347613811493, 'timestamp': '2025-09-10 02:44:44.188790', 'step': 8529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:44.246104', 'step': 8529, 'epoch': 2} {'type': 'loss', 'content': 0.13892851769924164, 'timestamp': '2025-09-10 02:44:44.254658', 'step': 8530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:44.310631', 'step': 8530, 'epoch': 2} {'type': 'loss', 'content': 0.1521640568971634, 'timestamp': '2025-09-10 02:44:44.313066', 'step': 8531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:44.369702', 'step': 8531, 'epoch': 2} {'type': 'loss', 'content': 0.149871364235878, 'timestamp': '2025-09-10 02:44:44.375757', 'step': 8532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:44.431299', 'step': 8532, 'epoch': 2} {'type': 'loss', 'content': 0.15987589955329895, 'timestamp': '2025-09-10 02:44:44.433762', 'step': 8533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:44.499980', 'step': 8533, 'epoch': 2} {'type': 'loss', 'content': 0.15444207191467285, 'timestamp': '2025-09-10 02:44:44.506582', 'step': 8534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:44.561009', 'step': 8534, 'epoch': 2} {'type': 'loss', 'content': 0.14406831562519073, 'timestamp': '2025-09-10 02:44:44.568094', 'step': 8535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:44.626419', 'step': 8535, 'epoch': 2} {'type': 'loss', 'content': 0.10809436440467834, 'timestamp': '2025-09-10 02:44:44.632651', 'step': 8536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:44.692890', 'step': 8536, 'epoch': 2} {'type': 'loss', 'content': 0.1784399449825287, 'timestamp': '2025-09-10 02:44:44.697465', 'step': 8537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:44.753558', 'step': 8537, 'epoch': 2} {'type': 'loss', 'content': 0.11035376787185669, 'timestamp': '2025-09-10 02:44:44.755865', 'step': 8538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:44.817222', 'step': 8538, 'epoch': 2} {'type': 'loss', 'content': 0.0706065222620964, 'timestamp': '2025-09-10 02:44:44.819677', 'step': 8539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:44.879774', 'step': 8539, 'epoch': 2} {'type': 'loss', 'content': 0.23521052300930023, 'timestamp': '2025-09-10 02:44:44.885632', 'step': 8540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:44.938454', 'step': 8540, 'epoch': 2} {'type': 'loss', 'content': 0.08647605031728745, 'timestamp': '2025-09-10 02:44:44.945801', 'step': 8541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:45.006965', 'step': 8541, 'epoch': 2} {'type': 'loss', 'content': 0.05220848321914673, 'timestamp': '2025-09-10 02:44:45.009591', 'step': 8542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:45.068524', 'step': 8542, 'epoch': 2} {'type': 'loss', 'content': 0.24473853409290314, 'timestamp': '2025-09-10 02:44:45.071083', 'step': 8543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:45.123845', 'step': 8543, 'epoch': 2} {'type': 'loss', 'content': 0.06764274835586548, 'timestamp': '2025-09-10 02:44:45.133980', 'step': 8544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:45.197897', 'step': 8544, 'epoch': 2} {'type': 'loss', 'content': 0.1596519947052002, 'timestamp': '2025-09-10 02:44:45.206306', 'step': 8545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:45.264644', 'step': 8545, 'epoch': 2} {'type': 'loss', 'content': 0.11259564012289047, 'timestamp': '2025-09-10 02:44:45.267320', 'step': 8546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:45.322987', 'step': 8546, 'epoch': 2} {'type': 'loss', 'content': 0.14577479660511017, 'timestamp': '2025-09-10 02:44:45.325416', 'step': 8547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:45.379527', 'step': 8547, 'epoch': 2} {'type': 'loss', 'content': 0.12957526743412018, 'timestamp': '2025-09-10 02:44:45.385646', 'step': 8548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:45.444913', 'step': 8548, 'epoch': 2} {'type': 'loss', 'content': 0.07600346207618713, 'timestamp': '2025-09-10 02:44:45.449720', 'step': 8549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:45.507302', 'step': 8549, 'epoch': 2} {'type': 'loss', 'content': 0.1343783289194107, 'timestamp': '2025-09-10 02:44:45.509744', 'step': 8550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:45.565867', 'step': 8550, 'epoch': 2} {'type': 'loss', 'content': 0.10795008391141891, 'timestamp': '2025-09-10 02:44:45.568504', 'step': 8551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:45.622748', 'step': 8551, 'epoch': 2} {'type': 'loss', 'content': 0.10118886828422546, 'timestamp': '2025-09-10 02:44:45.630028', 'step': 8552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:45.685505', 'step': 8552, 'epoch': 2} {'type': 'loss', 'content': 0.1460401862859726, 'timestamp': '2025-09-10 02:44:45.690034', 'step': 8553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:45.744298', 'step': 8553, 'epoch': 2} {'type': 'loss', 'content': 0.0948004275560379, 'timestamp': '2025-09-10 02:44:45.747004', 'step': 8554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:45.800562', 'step': 8554, 'epoch': 2} {'type': 'loss', 'content': 0.1735006421804428, 'timestamp': '2025-09-10 02:44:45.803089', 'step': 8555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:45.856446', 'step': 8555, 'epoch': 2} {'type': 'loss', 'content': 0.11741834878921509, 'timestamp': '2025-09-10 02:44:45.862592', 'step': 8556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:45.915627', 'step': 8556, 'epoch': 2} {'type': 'loss', 'content': 0.2012883573770523, 'timestamp': '2025-09-10 02:44:45.918131', 'step': 8557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:45.971817', 'step': 8557, 'epoch': 2} {'type': 'loss', 'content': 0.18091577291488647, 'timestamp': '2025-09-10 02:44:45.974580', 'step': 8558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:46.027656', 'step': 8558, 'epoch': 2} {'type': 'loss', 'content': 0.13790874183177948, 'timestamp': '2025-09-10 02:44:46.030455', 'step': 8559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:46.083498', 'step': 8559, 'epoch': 2} {'type': 'loss', 'content': 0.09697769582271576, 'timestamp': '2025-09-10 02:44:46.095783', 'step': 8560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:46.159504', 'step': 8560, 'epoch': 2} {'type': 'loss', 'content': 0.08976422250270844, 'timestamp': '2025-09-10 02:44:46.161986', 'step': 8561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:46.222509', 'step': 8561, 'epoch': 2} {'type': 'loss', 'content': 0.10188338160514832, 'timestamp': '2025-09-10 02:44:46.225183', 'step': 8562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:46.278648', 'step': 8562, 'epoch': 2} {'type': 'loss', 'content': 0.15937097370624542, 'timestamp': '2025-09-10 02:44:46.281192', 'step': 8563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:46.334667', 'step': 8563, 'epoch': 2} {'type': 'loss', 'content': 0.10432776063680649, 'timestamp': '2025-09-10 02:44:46.340875', 'step': 8564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:46.392791', 'step': 8564, 'epoch': 2} {'type': 'loss', 'content': 0.14501802623271942, 'timestamp': '2025-09-10 02:44:46.395238', 'step': 8565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:46.448748', 'step': 8565, 'epoch': 2} {'type': 'loss', 'content': 0.13492226600646973, 'timestamp': '2025-09-10 02:44:46.451415', 'step': 8566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:46.505855', 'step': 8566, 'epoch': 2} {'type': 'loss', 'content': 0.17150798439979553, 'timestamp': '2025-09-10 02:44:46.508795', 'step': 8567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:46.562145', 'step': 8567, 'epoch': 2} {'type': 'loss', 'content': 0.16224496066570282, 'timestamp': '2025-09-10 02:44:46.568388', 'step': 8568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:46.622701', 'step': 8568, 'epoch': 2} {'type': 'loss', 'content': 0.11807388067245483, 'timestamp': '2025-09-10 02:44:46.625207', 'step': 8569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:46.678547', 'step': 8569, 'epoch': 2} {'type': 'loss', 'content': 0.14279219508171082, 'timestamp': '2025-09-10 02:44:46.680958', 'step': 8570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:46.739882', 'step': 8570, 'epoch': 2} {'type': 'loss', 'content': 0.13612093031406403, 'timestamp': '2025-09-10 02:44:46.745211', 'step': 8571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:46.804242', 'step': 8571, 'epoch': 2} {'type': 'loss', 'content': 0.16586478054523468, 'timestamp': '2025-09-10 02:44:46.810551', 'step': 8572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:46.863689', 'step': 8572, 'epoch': 2} {'type': 'loss', 'content': 0.17487768828868866, 'timestamp': '2025-09-10 02:44:46.865906', 'step': 8573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:46.919637', 'step': 8573, 'epoch': 2} {'type': 'loss', 'content': 0.13852757215499878, 'timestamp': '2025-09-10 02:44:46.924486', 'step': 8574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:46.981393', 'step': 8574, 'epoch': 2} {'type': 'loss', 'content': 0.08309166133403778, 'timestamp': '2025-09-10 02:44:46.983832', 'step': 8575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:47.038390', 'step': 8575, 'epoch': 2} {'type': 'loss', 'content': 0.0965447723865509, 'timestamp': '2025-09-10 02:44:47.044321', 'step': 8576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:47.097256', 'step': 8576, 'epoch': 2} {'type': 'loss', 'content': 0.10030577331781387, 'timestamp': '2025-09-10 02:44:47.099709', 'step': 8577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:47.164094', 'step': 8577, 'epoch': 2} {'type': 'loss', 'content': 0.1968964785337448, 'timestamp': '2025-09-10 02:44:47.166378', 'step': 8578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:47.221126', 'step': 8578, 'epoch': 2} {'type': 'loss', 'content': 0.18162795901298523, 'timestamp': '2025-09-10 02:44:47.223560', 'step': 8579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:47.276790', 'step': 8579, 'epoch': 2} {'type': 'loss', 'content': 0.09709454327821732, 'timestamp': '2025-09-10 02:44:47.282859', 'step': 8580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:47.335665', 'step': 8580, 'epoch': 2} {'type': 'loss', 'content': 0.17814427614212036, 'timestamp': '2025-09-10 02:44:47.338379', 'step': 8581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:47.390547', 'step': 8581, 'epoch': 2} {'type': 'loss', 'content': 0.16737569868564606, 'timestamp': '2025-09-10 02:44:47.392968', 'step': 8582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:47.445643', 'step': 8582, 'epoch': 2} {'type': 'loss', 'content': 0.11926440894603729, 'timestamp': '2025-09-10 02:44:47.449999', 'step': 8583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:47.506710', 'step': 8583, 'epoch': 2} {'type': 'loss', 'content': 0.18083420395851135, 'timestamp': '2025-09-10 02:44:47.513896', 'step': 8584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:47.567148', 'step': 8584, 'epoch': 2} {'type': 'loss', 'content': 0.1083146333694458, 'timestamp': '2025-09-10 02:44:47.569602', 'step': 8585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:47.622699', 'step': 8585, 'epoch': 2} {'type': 'loss', 'content': 0.12959438562393188, 'timestamp': '2025-09-10 02:44:47.625030', 'step': 8586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:47.678289', 'step': 8586, 'epoch': 2} {'type': 'loss', 'content': 0.12090642005205154, 'timestamp': '2025-09-10 02:44:47.680605', 'step': 8587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:47.733784', 'step': 8587, 'epoch': 2} {'type': 'loss', 'content': 0.16324357688426971, 'timestamp': '2025-09-10 02:44:47.739706', 'step': 8588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:47.792427', 'step': 8588, 'epoch': 2} {'type': 'loss', 'content': 0.10173102468252182, 'timestamp': '2025-09-10 02:44:47.794753', 'step': 8589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:47.847993', 'step': 8589, 'epoch': 2} {'type': 'loss', 'content': 0.1518605351448059, 'timestamp': '2025-09-10 02:44:47.850463', 'step': 8590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:47.903600', 'step': 8590, 'epoch': 2} {'type': 'loss', 'content': 0.1989271640777588, 'timestamp': '2025-09-10 02:44:47.906931', 'step': 8591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:47.960218', 'step': 8591, 'epoch': 2} {'type': 'loss', 'content': 0.12904079258441925, 'timestamp': '2025-09-10 02:44:47.966507', 'step': 8592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:48.018872', 'step': 8592, 'epoch': 2} {'type': 'loss', 'content': 0.20064526796340942, 'timestamp': '2025-09-10 02:44:48.021314', 'step': 8593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:48.074824', 'step': 8593, 'epoch': 2} {'type': 'loss', 'content': 0.0994945541024208, 'timestamp': '2025-09-10 02:44:48.077361', 'step': 8594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:48.130940', 'step': 8594, 'epoch': 2} {'type': 'loss', 'content': 0.1853702962398529, 'timestamp': '2025-09-10 02:44:48.136301', 'step': 8595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:48.190523', 'step': 8595, 'epoch': 2} {'type': 'loss', 'content': 0.24883055686950684, 'timestamp': '2025-09-10 02:44:48.196727', 'step': 8596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:48.250199', 'step': 8596, 'epoch': 2} {'type': 'loss', 'content': 0.09322196990251541, 'timestamp': '2025-09-10 02:44:48.252605', 'step': 8597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:48.305893', 'step': 8597, 'epoch': 2} {'type': 'loss', 'content': 0.12963135540485382, 'timestamp': '2025-09-10 02:44:48.308337', 'step': 8598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:48.361210', 'step': 8598, 'epoch': 2} {'type': 'loss', 'content': 0.15350012481212616, 'timestamp': '2025-09-10 02:44:48.363629', 'step': 8599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:48.417412', 'step': 8599, 'epoch': 2} {'type': 'loss', 'content': 0.05072564631700516, 'timestamp': '2025-09-10 02:44:48.424568', 'step': 8600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:48.478784', 'step': 8600, 'epoch': 2} {'type': 'loss', 'content': 0.0971578061580658, 'timestamp': '2025-09-10 02:44:48.481214', 'step': 8601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:48.535507', 'step': 8601, 'epoch': 2} {'type': 'loss', 'content': 0.1720925271511078, 'timestamp': '2025-09-10 02:44:48.537654', 'step': 8602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:48.592941', 'step': 8602, 'epoch': 2} {'type': 'loss', 'content': 0.10035204142332077, 'timestamp': '2025-09-10 02:44:48.595431', 'step': 8603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:48.648949', 'step': 8603, 'epoch': 2} {'type': 'loss', 'content': 0.15434695780277252, 'timestamp': '2025-09-10 02:44:48.656931', 'step': 8604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:48.711314', 'step': 8604, 'epoch': 2} {'type': 'loss', 'content': 0.12724384665489197, 'timestamp': '2025-09-10 02:44:48.714401', 'step': 8605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:48.767938', 'step': 8605, 'epoch': 2} {'type': 'loss', 'content': 0.08246087282896042, 'timestamp': '2025-09-10 02:44:48.770407', 'step': 8606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:48.823391', 'step': 8606, 'epoch': 2} {'type': 'loss', 'content': 0.12524986267089844, 'timestamp': '2025-09-10 02:44:48.825690', 'step': 8607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:48.878588', 'step': 8607, 'epoch': 2} {'type': 'loss', 'content': 0.16900518536567688, 'timestamp': '2025-09-10 02:44:48.884713', 'step': 8608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:48.938098', 'step': 8608, 'epoch': 2} {'type': 'loss', 'content': 0.19516247510910034, 'timestamp': '2025-09-10 02:44:48.940840', 'step': 8609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:48.993487', 'step': 8609, 'epoch': 2} {'type': 'loss', 'content': 0.11619335412979126, 'timestamp': '2025-09-10 02:44:48.998915', 'step': 8610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:49.052343', 'step': 8610, 'epoch': 2} {'type': 'loss', 'content': 0.09849441796541214, 'timestamp': '2025-09-10 02:44:49.055702', 'step': 8611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:49.109443', 'step': 8611, 'epoch': 2} {'type': 'loss', 'content': 0.1038632020354271, 'timestamp': '2025-09-10 02:44:49.116354', 'step': 8612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:49.175073', 'step': 8612, 'epoch': 2} {'type': 'loss', 'content': 0.11705582588911057, 'timestamp': '2025-09-10 02:44:49.177488', 'step': 8613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:49.230532', 'step': 8613, 'epoch': 2} {'type': 'loss', 'content': 0.10955626517534256, 'timestamp': '2025-09-10 02:44:49.232698', 'step': 8614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:49.293211', 'step': 8614, 'epoch': 2} {'type': 'loss', 'content': 0.09785494953393936, 'timestamp': '2025-09-10 02:44:49.295768', 'step': 8615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:49.351619', 'step': 8615, 'epoch': 2} {'type': 'loss', 'content': 0.1753084510564804, 'timestamp': '2025-09-10 02:44:49.357426', 'step': 8616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:49.411004', 'step': 8616, 'epoch': 2} {'type': 'loss', 'content': 0.16571812331676483, 'timestamp': '2025-09-10 02:44:49.413296', 'step': 8617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:49.471543', 'step': 8617, 'epoch': 2} {'type': 'loss', 'content': 0.17989829182624817, 'timestamp': '2025-09-10 02:44:49.474360', 'step': 8618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:49.530977', 'step': 8618, 'epoch': 2} {'type': 'loss', 'content': 0.07698336988687515, 'timestamp': '2025-09-10 02:44:49.534413', 'step': 8619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:49.595516', 'step': 8619, 'epoch': 2} {'type': 'loss', 'content': 0.13300512731075287, 'timestamp': '2025-09-10 02:44:49.601423', 'step': 8620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:49.655271', 'step': 8620, 'epoch': 2} {'type': 'loss', 'content': 0.14929349720478058, 'timestamp': '2025-09-10 02:44:49.657810', 'step': 8621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:49.712807', 'step': 8621, 'epoch': 2} {'type': 'loss', 'content': 0.20508190989494324, 'timestamp': '2025-09-10 02:44:49.715646', 'step': 8622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:49.769530', 'step': 8622, 'epoch': 2} {'type': 'loss', 'content': 0.12746985256671906, 'timestamp': '2025-09-10 02:44:49.771920', 'step': 8623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:49.825914', 'step': 8623, 'epoch': 2} {'type': 'loss', 'content': 0.06010846048593521, 'timestamp': '2025-09-10 02:44:49.834599', 'step': 8624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:49.895166', 'step': 8624, 'epoch': 2} {'type': 'loss', 'content': 0.10711871087551117, 'timestamp': '2025-09-10 02:44:49.897840', 'step': 8625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:49.952130', 'step': 8625, 'epoch': 2} {'type': 'loss', 'content': 0.12030622363090515, 'timestamp': '2025-09-10 02:44:49.954607', 'step': 8626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:50.012699', 'step': 8626, 'epoch': 2} {'type': 'loss', 'content': 0.15378312766551971, 'timestamp': '2025-09-10 02:44:50.015196', 'step': 8627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:50.071562', 'step': 8627, 'epoch': 2} {'type': 'loss', 'content': 0.084833525121212, 'timestamp': '2025-09-10 02:44:50.077909', 'step': 8628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:50.130387', 'step': 8628, 'epoch': 2} {'type': 'loss', 'content': 0.09165625274181366, 'timestamp': '2025-09-10 02:44:50.134253', 'step': 8629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:50.188244', 'step': 8629, 'epoch': 2} {'type': 'loss', 'content': 0.16137468814849854, 'timestamp': '2025-09-10 02:44:50.190598', 'step': 8630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:50.245794', 'step': 8630, 'epoch': 2} {'type': 'loss', 'content': 0.12730801105499268, 'timestamp': '2025-09-10 02:44:50.248664', 'step': 8631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:50.304290', 'step': 8631, 'epoch': 2} {'type': 'loss', 'content': 0.17343059182167053, 'timestamp': '2025-09-10 02:44:50.311379', 'step': 8632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:50.365058', 'step': 8632, 'epoch': 2} {'type': 'loss', 'content': 0.10738305002450943, 'timestamp': '2025-09-10 02:44:50.370501', 'step': 8633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:50.428502', 'step': 8633, 'epoch': 2} {'type': 'loss', 'content': 0.16345717012882233, 'timestamp': '2025-09-10 02:44:50.430860', 'step': 8634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:50.488068', 'step': 8634, 'epoch': 2} {'type': 'loss', 'content': 0.1160733625292778, 'timestamp': '2025-09-10 02:44:50.494440', 'step': 8635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:50.560862', 'step': 8635, 'epoch': 2} {'type': 'loss', 'content': 0.1817982941865921, 'timestamp': '2025-09-10 02:44:50.567740', 'step': 8636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:50.631133', 'step': 8636, 'epoch': 2} {'type': 'loss', 'content': 0.10870590060949326, 'timestamp': '2025-09-10 02:44:50.633616', 'step': 8637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:50.695508', 'step': 8637, 'epoch': 2} {'type': 'loss', 'content': 0.15361621975898743, 'timestamp': '2025-09-10 02:44:50.697807', 'step': 8638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:50.778402', 'step': 8638, 'epoch': 2} {'type': 'loss', 'content': 0.1076299399137497, 'timestamp': '2025-09-10 02:44:50.780780', 'step': 8639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:50.843720', 'step': 8639, 'epoch': 2} {'type': 'loss', 'content': 0.17883650958538055, 'timestamp': '2025-09-10 02:44:50.849766', 'step': 8640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:50.906249', 'step': 8640, 'epoch': 2} {'type': 'loss', 'content': 0.129907488822937, 'timestamp': '2025-09-10 02:44:50.909937', 'step': 8641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:50.967516', 'step': 8641, 'epoch': 2} {'type': 'loss', 'content': 0.12396463006734848, 'timestamp': '2025-09-10 02:44:50.974842', 'step': 8642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.036749', 'step': 8642, 'epoch': 2} {'type': 'loss', 'content': 0.16805648803710938, 'timestamp': '2025-09-10 02:44:51.039035', 'step': 8643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:51.097588', 'step': 8643, 'epoch': 2} {'type': 'loss', 'content': 0.0883931890130043, 'timestamp': '2025-09-10 02:44:51.103871', 'step': 8644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:51.160611', 'step': 8644, 'epoch': 2} {'type': 'loss', 'content': 0.060237985104322433, 'timestamp': '2025-09-10 02:44:51.163298', 'step': 8645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.222761', 'step': 8645, 'epoch': 2} {'type': 'loss', 'content': 0.11286561191082001, 'timestamp': '2025-09-10 02:44:51.225233', 'step': 8646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:51.283552', 'step': 8646, 'epoch': 2} {'type': 'loss', 'content': 0.20478253066539764, 'timestamp': '2025-09-10 02:44:51.285919', 'step': 8647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.346594', 'step': 8647, 'epoch': 2} {'type': 'loss', 'content': 0.19514907896518707, 'timestamp': '2025-09-10 02:44:51.352802', 'step': 8648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.429318', 'step': 8648, 'epoch': 2} {'type': 'loss', 'content': 0.07372881472110748, 'timestamp': '2025-09-10 02:44:51.431493', 'step': 8649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:51.500440', 'step': 8649, 'epoch': 2} {'type': 'loss', 'content': 0.12945133447647095, 'timestamp': '2025-09-10 02:44:51.502753', 'step': 8650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:51.566430', 'step': 8650, 'epoch': 2} {'type': 'loss', 'content': 0.1786525994539261, 'timestamp': '2025-09-10 02:44:51.568759', 'step': 8651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:51.626293', 'step': 8651, 'epoch': 2} {'type': 'loss', 'content': 0.060432784259319305, 'timestamp': '2025-09-10 02:44:51.632768', 'step': 8652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.688144', 'step': 8652, 'epoch': 2} {'type': 'loss', 'content': 0.1282775104045868, 'timestamp': '2025-09-10 02:44:51.692498', 'step': 8653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.752750', 'step': 8653, 'epoch': 2} {'type': 'loss', 'content': 0.15814755856990814, 'timestamp': '2025-09-10 02:44:51.755375', 'step': 8654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.809726', 'step': 8654, 'epoch': 2} {'type': 'loss', 'content': 0.14590293169021606, 'timestamp': '2025-09-10 02:44:51.815305', 'step': 8655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.870161', 'step': 8655, 'epoch': 2} {'type': 'loss', 'content': 0.15532666444778442, 'timestamp': '2025-09-10 02:44:51.876469', 'step': 8656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.935019', 'step': 8656, 'epoch': 2} {'type': 'loss', 'content': 0.15271343290805817, 'timestamp': '2025-09-10 02:44:51.937510', 'step': 8657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:51.995481', 'step': 8657, 'epoch': 2} {'type': 'loss', 'content': 0.12621383368968964, 'timestamp': '2025-09-10 02:44:51.997890', 'step': 8658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:52.055861', 'step': 8658, 'epoch': 2} {'type': 'loss', 'content': 0.10864945501089096, 'timestamp': '2025-09-10 02:44:52.058292', 'step': 8659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:52.113086', 'step': 8659, 'epoch': 2} {'type': 'loss', 'content': 0.10747168213129044, 'timestamp': '2025-09-10 02:44:52.119523', 'step': 8660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:52.173759', 'step': 8660, 'epoch': 2} {'type': 'loss', 'content': 0.07303541153669357, 'timestamp': '2025-09-10 02:44:52.176185', 'step': 8661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:52.231073', 'step': 8661, 'epoch': 2} {'type': 'loss', 'content': 0.09424842894077301, 'timestamp': '2025-09-10 02:44:52.233496', 'step': 8662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:52.288305', 'step': 8662, 'epoch': 2} {'type': 'loss', 'content': 0.12938985228538513, 'timestamp': '2025-09-10 02:44:52.290720', 'step': 8663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:52.344400', 'step': 8663, 'epoch': 2} {'type': 'loss', 'content': 0.12546558678150177, 'timestamp': '2025-09-10 02:44:52.356527', 'step': 8664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:52.409582', 'step': 8664, 'epoch': 2} {'type': 'loss', 'content': 0.10427017509937286, 'timestamp': '2025-09-10 02:44:52.414501', 'step': 8665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:52.470548', 'step': 8665, 'epoch': 2} {'type': 'loss', 'content': 0.18971775472164154, 'timestamp': '2025-09-10 02:44:52.472858', 'step': 8666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:52.531542', 'step': 8666, 'epoch': 2} {'type': 'loss', 'content': 0.1097794622182846, 'timestamp': '2025-09-10 02:44:52.535301', 'step': 8667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:52.591978', 'step': 8667, 'epoch': 2} {'type': 'loss', 'content': 0.09245505183935165, 'timestamp': '2025-09-10 02:44:52.598071', 'step': 8668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:52.662491', 'step': 8668, 'epoch': 2} {'type': 'loss', 'content': 0.09032056480646133, 'timestamp': '2025-09-10 02:44:52.664925', 'step': 8669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:52.736874', 'step': 8669, 'epoch': 2} {'type': 'loss', 'content': 0.19763338565826416, 'timestamp': '2025-09-10 02:44:52.739112', 'step': 8670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:52.795394', 'step': 8670, 'epoch': 2} {'type': 'loss', 'content': 0.11471319943666458, 'timestamp': '2025-09-10 02:44:52.799912', 'step': 8671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:52.858997', 'step': 8671, 'epoch': 2} {'type': 'loss', 'content': 0.16175149381160736, 'timestamp': '2025-09-10 02:44:52.865494', 'step': 8672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:52.926139', 'step': 8672, 'epoch': 2} {'type': 'loss', 'content': 0.10587016493082047, 'timestamp': '2025-09-10 02:44:52.928568', 'step': 8673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:52.991174', 'step': 8673, 'epoch': 2} {'type': 'loss', 'content': 0.10349874198436737, 'timestamp': '2025-09-10 02:44:52.993346', 'step': 8674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:53.053398', 'step': 8674, 'epoch': 2} {'type': 'loss', 'content': 0.13025742769241333, 'timestamp': '2025-09-10 02:44:53.055643', 'step': 8675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:53.117569', 'step': 8675, 'epoch': 2} {'type': 'loss', 'content': 0.11980945616960526, 'timestamp': '2025-09-10 02:44:53.123664', 'step': 8676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:53.180837', 'step': 8676, 'epoch': 2} {'type': 'loss', 'content': 0.14296337962150574, 'timestamp': '2025-09-10 02:44:53.184618', 'step': 8677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:53.242862', 'step': 8677, 'epoch': 2} {'type': 'loss', 'content': 0.14150625467300415, 'timestamp': '2025-09-10 02:44:53.245084', 'step': 8678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:53.314966', 'step': 8678, 'epoch': 2} {'type': 'loss', 'content': 0.15081314742565155, 'timestamp': '2025-09-10 02:44:53.317283', 'step': 8679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:53.370934', 'step': 8679, 'epoch': 2} {'type': 'loss', 'content': 0.10476478189229965, 'timestamp': '2025-09-10 02:44:53.378643', 'step': 8680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:53.433893', 'step': 8680, 'epoch': 2} {'type': 'loss', 'content': 0.1525779515504837, 'timestamp': '2025-09-10 02:44:53.436478', 'step': 8681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:53.495720', 'step': 8681, 'epoch': 2} {'type': 'loss', 'content': 0.09827394038438797, 'timestamp': '2025-09-10 02:44:53.523623', 'step': 8682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:53.589386', 'step': 8682, 'epoch': 2} {'type': 'loss', 'content': 0.14213186502456665, 'timestamp': '2025-09-10 02:44:53.595587', 'step': 8683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:53.662997', 'step': 8683, 'epoch': 2} {'type': 'loss', 'content': 0.09872499108314514, 'timestamp': '2025-09-10 02:44:53.668944', 'step': 8684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:53.724462', 'step': 8684, 'epoch': 2} {'type': 'loss', 'content': 0.17712558805942535, 'timestamp': '2025-09-10 02:44:53.726502', 'step': 8685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:53.782291', 'step': 8685, 'epoch': 2} {'type': 'loss', 'content': 0.25207775831222534, 'timestamp': '2025-09-10 02:44:53.784597', 'step': 8686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:53.840954', 'step': 8686, 'epoch': 2} {'type': 'loss', 'content': 0.19110006093978882, 'timestamp': '2025-09-10 02:44:53.843195', 'step': 8687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:53.898393', 'step': 8687, 'epoch': 2} {'type': 'loss', 'content': 0.15076780319213867, 'timestamp': '2025-09-10 02:44:53.904696', 'step': 8688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:53.964458', 'step': 8688, 'epoch': 2} {'type': 'loss', 'content': 0.16710329055786133, 'timestamp': '2025-09-10 02:44:53.967057', 'step': 8689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:54.020721', 'step': 8689, 'epoch': 2} {'type': 'loss', 'content': 0.19667953252792358, 'timestamp': '2025-09-10 02:44:54.025362', 'step': 8690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:54.085883', 'step': 8690, 'epoch': 2} {'type': 'loss', 'content': 0.21435301005840302, 'timestamp': '2025-09-10 02:44:54.091304', 'step': 8691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:54.153686', 'step': 8691, 'epoch': 2} {'type': 'loss', 'content': 0.15302738547325134, 'timestamp': '2025-09-10 02:44:54.169632', 'step': 8692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:54.228711', 'step': 8692, 'epoch': 2} {'type': 'loss', 'content': 0.16271845996379852, 'timestamp': '2025-09-10 02:44:54.232835', 'step': 8693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:54.292847', 'step': 8693, 'epoch': 2} {'type': 'loss', 'content': 0.1361379325389862, 'timestamp': '2025-09-10 02:44:54.298375', 'step': 8694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:54.360230', 'step': 8694, 'epoch': 2} {'type': 'loss', 'content': 0.09217289835214615, 'timestamp': '2025-09-10 02:44:54.362339', 'step': 8695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:54.416392', 'step': 8695, 'epoch': 2} {'type': 'loss', 'content': 0.09584669768810272, 'timestamp': '2025-09-10 02:44:54.422596', 'step': 8696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:54.479474', 'step': 8696, 'epoch': 2} {'type': 'loss', 'content': 0.18288609385490417, 'timestamp': '2025-09-10 02:44:54.481934', 'step': 8697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:54.560641', 'step': 8697, 'epoch': 2} {'type': 'loss', 'content': 0.3048287034034729, 'timestamp': '2025-09-10 02:44:54.567318', 'step': 8698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:54.645697', 'step': 8698, 'epoch': 2} {'type': 'loss', 'content': 0.09519295394420624, 'timestamp': '2025-09-10 02:44:54.647837', 'step': 8699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:54.763909', 'step': 8699, 'epoch': 2} {'type': 'loss', 'content': 0.19207623600959778, 'timestamp': '2025-09-10 02:44:54.770181', 'step': 8700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:54.839516', 'step': 8700, 'epoch': 2} {'type': 'loss', 'content': 0.19586588442325592, 'timestamp': '2025-09-10 02:44:54.845444', 'step': 8701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:54.915766', 'step': 8701, 'epoch': 2} {'type': 'loss', 'content': 0.13101299107074738, 'timestamp': '2025-09-10 02:44:54.918137', 'step': 8702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:54.978512', 'step': 8702, 'epoch': 2} {'type': 'loss', 'content': 0.12203928828239441, 'timestamp': '2025-09-10 02:44:54.980992', 'step': 8703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.040158', 'step': 8703, 'epoch': 2} {'type': 'loss', 'content': 0.09532494097948074, 'timestamp': '2025-09-10 02:44:55.046341', 'step': 8704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.101973', 'step': 8704, 'epoch': 2} {'type': 'loss', 'content': 0.1696094274520874, 'timestamp': '2025-09-10 02:44:55.107518', 'step': 8705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:55.164810', 'step': 8705, 'epoch': 2} {'type': 'loss', 'content': 0.09441424906253815, 'timestamp': '2025-09-10 02:44:55.166908', 'step': 8706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:55.221385', 'step': 8706, 'epoch': 2} {'type': 'loss', 'content': 0.16457216441631317, 'timestamp': '2025-09-10 02:44:55.223699', 'step': 8707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.278056', 'step': 8707, 'epoch': 2} {'type': 'loss', 'content': 0.08336184918880463, 'timestamp': '2025-09-10 02:44:55.284184', 'step': 8708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.339002', 'step': 8708, 'epoch': 2} {'type': 'loss', 'content': 0.17444199323654175, 'timestamp': '2025-09-10 02:44:55.341217', 'step': 8709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.395318', 'step': 8709, 'epoch': 2} {'type': 'loss', 'content': 0.11337827146053314, 'timestamp': '2025-09-10 02:44:55.397805', 'step': 8710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:55.452811', 'step': 8710, 'epoch': 2} {'type': 'loss', 'content': 0.10548679530620575, 'timestamp': '2025-09-10 02:44:55.455346', 'step': 8711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:55.508922', 'step': 8711, 'epoch': 2} {'type': 'loss', 'content': 0.12381389737129211, 'timestamp': '2025-09-10 02:44:55.514929', 'step': 8712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.567690', 'step': 8712, 'epoch': 2} {'type': 'loss', 'content': 0.12782251834869385, 'timestamp': '2025-09-10 02:44:55.569682', 'step': 8713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.623688', 'step': 8713, 'epoch': 2} {'type': 'loss', 'content': 0.10474138706922531, 'timestamp': '2025-09-10 02:44:55.631293', 'step': 8714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.688533', 'step': 8714, 'epoch': 2} {'type': 'loss', 'content': 0.1383141577243805, 'timestamp': '2025-09-10 02:44:55.690814', 'step': 8715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:55.744572', 'step': 8715, 'epoch': 2} {'type': 'loss', 'content': 0.13681066036224365, 'timestamp': '2025-09-10 02:44:55.750449', 'step': 8716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.803949', 'step': 8716, 'epoch': 2} {'type': 'loss', 'content': 0.11877069622278214, 'timestamp': '2025-09-10 02:44:55.806329', 'step': 8717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.860914', 'step': 8717, 'epoch': 2} {'type': 'loss', 'content': 0.1882341057062149, 'timestamp': '2025-09-10 02:44:55.863117', 'step': 8718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.917151', 'step': 8718, 'epoch': 2} {'type': 'loss', 'content': 0.16563716530799866, 'timestamp': '2025-09-10 02:44:55.919295', 'step': 8719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:55.977402', 'step': 8719, 'epoch': 2} {'type': 'loss', 'content': 0.19528155028820038, 'timestamp': '2025-09-10 02:44:55.984560', 'step': 8720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:56.069714', 'step': 8720, 'epoch': 2} {'type': 'loss', 'content': 0.14865203201770782, 'timestamp': '2025-09-10 02:44:56.072161', 'step': 8721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:56.129567', 'step': 8721, 'epoch': 2} {'type': 'loss', 'content': 0.20369881391525269, 'timestamp': '2025-09-10 02:44:56.132454', 'step': 8722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:56.193965', 'step': 8722, 'epoch': 2} {'type': 'loss', 'content': 0.13518258929252625, 'timestamp': '2025-09-10 02:44:56.216179', 'step': 8723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:56.275616', 'step': 8723, 'epoch': 2} {'type': 'loss', 'content': 0.07527629286050797, 'timestamp': '2025-09-10 02:44:56.300285', 'step': 8724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:56.362237', 'step': 8724, 'epoch': 2} {'type': 'loss', 'content': 0.13865572214126587, 'timestamp': '2025-09-10 02:44:56.365445', 'step': 8725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:56.419510', 'step': 8725, 'epoch': 2} {'type': 'loss', 'content': 0.13717961311340332, 'timestamp': '2025-09-10 02:44:56.424696', 'step': 8726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:56.481044', 'step': 8726, 'epoch': 2} {'type': 'loss', 'content': 0.1891711950302124, 'timestamp': '2025-09-10 02:44:56.484251', 'step': 8727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:56.539928', 'step': 8727, 'epoch': 2} {'type': 'loss', 'content': 0.03632820397615433, 'timestamp': '2025-09-10 02:44:56.554270', 'step': 8728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:56.615754', 'step': 8728, 'epoch': 2} {'type': 'loss', 'content': 0.11124759167432785, 'timestamp': '2025-09-10 02:44:56.618582', 'step': 8729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:56.672622', 'step': 8729, 'epoch': 2} {'type': 'loss', 'content': 0.2069813758134842, 'timestamp': '2025-09-10 02:44:56.676424', 'step': 8730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:56.746476', 'step': 8730, 'epoch': 2} {'type': 'loss', 'content': 0.16144251823425293, 'timestamp': '2025-09-10 02:44:56.750366', 'step': 8731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:56.820151', 'step': 8731, 'epoch': 2} {'type': 'loss', 'content': 0.10474678874015808, 'timestamp': '2025-09-10 02:44:56.826402', 'step': 8732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:56.884564', 'step': 8732, 'epoch': 2} {'type': 'loss', 'content': 0.1668124943971634, 'timestamp': '2025-09-10 02:44:56.886807', 'step': 8733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:56.941010', 'step': 8733, 'epoch': 2} {'type': 'loss', 'content': 0.12405329942703247, 'timestamp': '2025-09-10 02:44:56.944289', 'step': 8734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:57.002403', 'step': 8734, 'epoch': 2} {'type': 'loss', 'content': 0.06216975301504135, 'timestamp': '2025-09-10 02:44:57.004731', 'step': 8735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:57.058059', 'step': 8735, 'epoch': 2} {'type': 'loss', 'content': 0.13041892647743225, 'timestamp': '2025-09-10 02:44:57.064184', 'step': 8736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:57.120236', 'step': 8736, 'epoch': 2} {'type': 'loss', 'content': 0.18390843272209167, 'timestamp': '2025-09-10 02:44:57.128676', 'step': 8737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:57.183470', 'step': 8737, 'epoch': 2} {'type': 'loss', 'content': 0.14947566390037537, 'timestamp': '2025-09-10 02:44:57.185941', 'step': 8738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:57.240464', 'step': 8738, 'epoch': 2} {'type': 'loss', 'content': 0.10189352929592133, 'timestamp': '2025-09-10 02:44:57.243301', 'step': 8739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:57.301130', 'step': 8739, 'epoch': 2} {'type': 'loss', 'content': 0.1698305308818817, 'timestamp': '2025-09-10 02:44:57.308281', 'step': 8740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:57.371092', 'step': 8740, 'epoch': 2} {'type': 'loss', 'content': 0.11621706187725067, 'timestamp': '2025-09-10 02:44:57.373726', 'step': 8741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:57.430173', 'step': 8741, 'epoch': 2} {'type': 'loss', 'content': 0.09707042574882507, 'timestamp': '2025-09-10 02:44:57.436504', 'step': 8742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:57.497015', 'step': 8742, 'epoch': 2} {'type': 'loss', 'content': 0.1362086534500122, 'timestamp': '2025-09-10 02:44:57.499367', 'step': 8743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:57.559554', 'step': 8743, 'epoch': 2} {'type': 'loss', 'content': 0.11916837841272354, 'timestamp': '2025-09-10 02:44:57.565732', 'step': 8744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:57.620907', 'step': 8744, 'epoch': 2} {'type': 'loss', 'content': 0.1518969088792801, 'timestamp': '2025-09-10 02:44:57.623718', 'step': 8745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:57.680786', 'step': 8745, 'epoch': 2} {'type': 'loss', 'content': 0.16985563933849335, 'timestamp': '2025-09-10 02:44:57.683045', 'step': 8746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:57.740452', 'step': 8746, 'epoch': 2} {'type': 'loss', 'content': 0.20294561982154846, 'timestamp': '2025-09-10 02:44:57.744097', 'step': 8747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:57.800647', 'step': 8747, 'epoch': 2} {'type': 'loss', 'content': 0.1005106195807457, 'timestamp': '2025-09-10 02:44:57.806818', 'step': 8748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:57.865871', 'step': 8748, 'epoch': 2} {'type': 'loss', 'content': 0.1341429203748703, 'timestamp': '2025-09-10 02:44:57.870082', 'step': 8749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:57.923628', 'step': 8749, 'epoch': 2} {'type': 'loss', 'content': 0.14145946502685547, 'timestamp': '2025-09-10 02:44:57.926702', 'step': 8750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:57.986353', 'step': 8750, 'epoch': 2} {'type': 'loss', 'content': 0.17561203241348267, 'timestamp': '2025-09-10 02:44:57.988449', 'step': 8751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:58.049742', 'step': 8751, 'epoch': 2} {'type': 'loss', 'content': 0.0791688933968544, 'timestamp': '2025-09-10 02:44:58.056086', 'step': 8752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:58.109510', 'step': 8752, 'epoch': 2} {'type': 'loss', 'content': 0.11433879286050797, 'timestamp': '2025-09-10 02:44:58.111678', 'step': 8753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:58.167972', 'step': 8753, 'epoch': 2} {'type': 'loss', 'content': 0.09927160292863846, 'timestamp': '2025-09-10 02:44:58.172235', 'step': 8754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:44:58.230202', 'step': 8754, 'epoch': 2} {'type': 'loss', 'content': 0.16453289985656738, 'timestamp': '2025-09-10 02:44:58.232585', 'step': 8755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:58.286074', 'step': 8755, 'epoch': 2} {'type': 'loss', 'content': 0.19507916271686554, 'timestamp': '2025-09-10 02:44:58.294482', 'step': 8756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:58.347978', 'step': 8756, 'epoch': 2} {'type': 'loss', 'content': 0.07892075926065445, 'timestamp': '2025-09-10 02:44:58.350285', 'step': 8757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:58.403484', 'step': 8757, 'epoch': 2} {'type': 'loss', 'content': 0.1291334629058838, 'timestamp': '2025-09-10 02:44:58.405931', 'step': 8758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:58.465601', 'step': 8758, 'epoch': 2} {'type': 'loss', 'content': 0.12398677319288254, 'timestamp': '2025-09-10 02:44:58.467883', 'step': 8759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:58.522373', 'step': 8759, 'epoch': 2} {'type': 'loss', 'content': 0.09372812509536743, 'timestamp': '2025-09-10 02:44:58.528661', 'step': 8760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:58.581981', 'step': 8760, 'epoch': 2} {'type': 'loss', 'content': 0.24364781379699707, 'timestamp': '2025-09-10 02:44:58.585615', 'step': 8761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:58.638868', 'step': 8761, 'epoch': 2} {'type': 'loss', 'content': 0.1549842357635498, 'timestamp': '2025-09-10 02:44:58.641211', 'step': 8762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:58.697160', 'step': 8762, 'epoch': 2} {'type': 'loss', 'content': 0.18811267614364624, 'timestamp': '2025-09-10 02:44:58.699390', 'step': 8763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:58.753551', 'step': 8763, 'epoch': 2} {'type': 'loss', 'content': 0.1263311207294464, 'timestamp': '2025-09-10 02:44:58.759824', 'step': 8764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:58.822542', 'step': 8764, 'epoch': 2} {'type': 'loss', 'content': 0.1250719428062439, 'timestamp': '2025-09-10 02:44:58.825310', 'step': 8765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:58.879032', 'step': 8765, 'epoch': 2} {'type': 'loss', 'content': 0.24330654740333557, 'timestamp': '2025-09-10 02:44:58.881339', 'step': 8766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:58.935337', 'step': 8766, 'epoch': 2} {'type': 'loss', 'content': 0.14199715852737427, 'timestamp': '2025-09-10 02:44:58.937583', 'step': 8767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:58.990751', 'step': 8767, 'epoch': 2} {'type': 'loss', 'content': 0.16574344038963318, 'timestamp': '2025-09-10 02:44:58.998999', 'step': 8768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:59.052936', 'step': 8768, 'epoch': 2} {'type': 'loss', 'content': 0.09290412813425064, 'timestamp': '2025-09-10 02:44:59.055367', 'step': 8769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:59.108685', 'step': 8769, 'epoch': 2} {'type': 'loss', 'content': 0.07178687304258347, 'timestamp': '2025-09-10 02:44:59.110809', 'step': 8770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:44:59.164418', 'step': 8770, 'epoch': 2} {'type': 'loss', 'content': 0.12744523584842682, 'timestamp': '2025-09-10 02:44:59.166743', 'step': 8771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:59.220701', 'step': 8771, 'epoch': 2} {'type': 'loss', 'content': 0.1048244759440422, 'timestamp': '2025-09-10 02:44:59.227307', 'step': 8772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:59.279780', 'step': 8772, 'epoch': 2} {'type': 'loss', 'content': 0.14335162937641144, 'timestamp': '2025-09-10 02:44:59.281952', 'step': 8773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:59.335616', 'step': 8773, 'epoch': 2} {'type': 'loss', 'content': 0.14905408024787903, 'timestamp': '2025-09-10 02:44:59.337993', 'step': 8774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:59.391900', 'step': 8774, 'epoch': 2} {'type': 'loss', 'content': 0.10639987885951996, 'timestamp': '2025-09-10 02:44:59.394165', 'step': 8775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:59.448178', 'step': 8775, 'epoch': 2} {'type': 'loss', 'content': 0.1302802562713623, 'timestamp': '2025-09-10 02:44:59.454392', 'step': 8776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:59.507522', 'step': 8776, 'epoch': 2} {'type': 'loss', 'content': 0.08167201280593872, 'timestamp': '2025-09-10 02:44:59.509745', 'step': 8777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:59.563499', 'step': 8777, 'epoch': 2} {'type': 'loss', 'content': 0.10034187138080597, 'timestamp': '2025-09-10 02:44:59.565692', 'step': 8778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:59.619569', 'step': 8778, 'epoch': 2} {'type': 'loss', 'content': 0.14832282066345215, 'timestamp': '2025-09-10 02:44:59.621872', 'step': 8779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:59.678330', 'step': 8779, 'epoch': 2} {'type': 'loss', 'content': 0.056032467633485794, 'timestamp': '2025-09-10 02:44:59.684710', 'step': 8780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:59.738393', 'step': 8780, 'epoch': 2} {'type': 'loss', 'content': 0.20368365943431854, 'timestamp': '2025-09-10 02:44:59.741908', 'step': 8781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:59.795619', 'step': 8781, 'epoch': 2} {'type': 'loss', 'content': 0.10592401027679443, 'timestamp': '2025-09-10 02:44:59.797986', 'step': 8782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:44:59.851534', 'step': 8782, 'epoch': 2} {'type': 'loss', 'content': 0.09627743810415268, 'timestamp': '2025-09-10 02:44:59.853941', 'step': 8783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:59.906694', 'step': 8783, 'epoch': 2} {'type': 'loss', 'content': 0.08944623172283173, 'timestamp': '2025-09-10 02:44:59.913357', 'step': 8784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:44:59.967461', 'step': 8784, 'epoch': 2} {'type': 'loss', 'content': 0.22337037324905396, 'timestamp': '2025-09-10 02:44:59.969667', 'step': 8785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:00.022920', 'step': 8785, 'epoch': 2} {'type': 'loss', 'content': 0.06696141511201859, 'timestamp': '2025-09-10 02:45:00.025194', 'step': 8786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:00.078522', 'step': 8786, 'epoch': 2} {'type': 'loss', 'content': 0.1432207226753235, 'timestamp': '2025-09-10 02:45:00.085884', 'step': 8787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:00.149446', 'step': 8787, 'epoch': 2} {'type': 'loss', 'content': 0.11615972220897675, 'timestamp': '2025-09-10 02:45:00.155748', 'step': 8788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:00.211298', 'step': 8788, 'epoch': 2} {'type': 'loss', 'content': 0.12899455428123474, 'timestamp': '2025-09-10 02:45:00.213716', 'step': 8789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:00.275815', 'step': 8789, 'epoch': 2} {'type': 'loss', 'content': 0.07214970141649246, 'timestamp': '2025-09-10 02:45:00.278192', 'step': 8790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:00.331566', 'step': 8790, 'epoch': 2} {'type': 'loss', 'content': 0.10397873818874359, 'timestamp': '2025-09-10 02:45:00.336910', 'step': 8791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:00.389963', 'step': 8791, 'epoch': 2} {'type': 'loss', 'content': 0.22047197818756104, 'timestamp': '2025-09-10 02:45:00.396080', 'step': 8792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:00.449594', 'step': 8792, 'epoch': 2} {'type': 'loss', 'content': 0.14156925678253174, 'timestamp': '2025-09-10 02:45:00.454951', 'step': 8793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:00.515656', 'step': 8793, 'epoch': 2} {'type': 'loss', 'content': 0.18511538207530975, 'timestamp': '2025-09-10 02:45:00.517932', 'step': 8794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:00.571974', 'step': 8794, 'epoch': 2} {'type': 'loss', 'content': 0.16711148619651794, 'timestamp': '2025-09-10 02:45:00.575893', 'step': 8795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:00.631553', 'step': 8795, 'epoch': 2} {'type': 'loss', 'content': 0.13209941983222961, 'timestamp': '2025-09-10 02:45:00.638262', 'step': 8796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:00.695167', 'step': 8796, 'epoch': 2} {'type': 'loss', 'content': 0.07316390424966812, 'timestamp': '2025-09-10 02:45:00.697641', 'step': 8797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:00.755721', 'step': 8797, 'epoch': 2} {'type': 'loss', 'content': 0.06733438372612, 'timestamp': '2025-09-10 02:45:00.758379', 'step': 8798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:00.813568', 'step': 8798, 'epoch': 2} {'type': 'loss', 'content': 0.13064725697040558, 'timestamp': '2025-09-10 02:45:00.816371', 'step': 8799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:00.870011', 'step': 8799, 'epoch': 2} {'type': 'loss', 'content': 0.06650230288505554, 'timestamp': '2025-09-10 02:45:00.876218', 'step': 8800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:00.931517', 'step': 8800, 'epoch': 2} {'type': 'loss', 'content': 0.18417766690254211, 'timestamp': '2025-09-10 02:45:00.934366', 'step': 8801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:00.988459', 'step': 8801, 'epoch': 2} {'type': 'loss', 'content': 0.12986484169960022, 'timestamp': '2025-09-10 02:45:00.990748', 'step': 8802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:01.044189', 'step': 8802, 'epoch': 2} {'type': 'loss', 'content': 0.08485358208417892, 'timestamp': '2025-09-10 02:45:01.046711', 'step': 8803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:01.100741', 'step': 8803, 'epoch': 2} {'type': 'loss', 'content': 0.07167575508356094, 'timestamp': '2025-09-10 02:45:01.107437', 'step': 8804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:01.163216', 'step': 8804, 'epoch': 2} {'type': 'loss', 'content': 0.19753549993038177, 'timestamp': '2025-09-10 02:45:01.165605', 'step': 8805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:01.221524', 'step': 8805, 'epoch': 2} {'type': 'loss', 'content': 0.1297302395105362, 'timestamp': '2025-09-10 02:45:01.224131', 'step': 8806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:01.280386', 'step': 8806, 'epoch': 2} {'type': 'loss', 'content': 0.08721216022968292, 'timestamp': '2025-09-10 02:45:01.282807', 'step': 8807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:01.336179', 'step': 8807, 'epoch': 2} {'type': 'loss', 'content': 0.18960677087306976, 'timestamp': '2025-09-10 02:45:01.342611', 'step': 8808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:01.396764', 'step': 8808, 'epoch': 2} {'type': 'loss', 'content': 0.18703925609588623, 'timestamp': '2025-09-10 02:45:01.399184', 'step': 8809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:01.454949', 'step': 8809, 'epoch': 2} {'type': 'loss', 'content': 0.11032051593065262, 'timestamp': '2025-09-10 02:45:01.457386', 'step': 8810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:01.512705', 'step': 8810, 'epoch': 2} {'type': 'loss', 'content': 0.12440662086009979, 'timestamp': '2025-09-10 02:45:01.515088', 'step': 8811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:01.571465', 'step': 8811, 'epoch': 2} {'type': 'loss', 'content': 0.08145556598901749, 'timestamp': '2025-09-10 02:45:01.577763', 'step': 8812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:01.633023', 'step': 8812, 'epoch': 2} {'type': 'loss', 'content': 0.1654747575521469, 'timestamp': '2025-09-10 02:45:01.635289', 'step': 8813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:01.697418', 'step': 8813, 'epoch': 2} {'type': 'loss', 'content': 0.12126950919628143, 'timestamp': '2025-09-10 02:45:01.699661', 'step': 8814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:01.753507', 'step': 8814, 'epoch': 2} {'type': 'loss', 'content': 0.21782071888446808, 'timestamp': '2025-09-10 02:45:01.757969', 'step': 8815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:01.813971', 'step': 8815, 'epoch': 2} {'type': 'loss', 'content': 0.08559735864400864, 'timestamp': '2025-09-10 02:45:01.823594', 'step': 8816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:01.878408', 'step': 8816, 'epoch': 2} {'type': 'loss', 'content': 0.171634241938591, 'timestamp': '2025-09-10 02:45:01.880598', 'step': 8817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:01.935384', 'step': 8817, 'epoch': 2} {'type': 'loss', 'content': 0.19597911834716797, 'timestamp': '2025-09-10 02:45:01.937591', 'step': 8818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:01.991730', 'step': 8818, 'epoch': 2} {'type': 'loss', 'content': 0.2025502622127533, 'timestamp': '2025-09-10 02:45:01.994209', 'step': 8819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:02.052601', 'step': 8819, 'epoch': 2} {'type': 'loss', 'content': 0.18535804748535156, 'timestamp': '2025-09-10 02:45:02.058753', 'step': 8820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:02.111928', 'step': 8820, 'epoch': 2} {'type': 'loss', 'content': 0.13436336815357208, 'timestamp': '2025-09-10 02:45:02.114217', 'step': 8821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:02.169123', 'step': 8821, 'epoch': 2} {'type': 'loss', 'content': 0.06890052556991577, 'timestamp': '2025-09-10 02:45:02.171310', 'step': 8822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:02.225380', 'step': 8822, 'epoch': 2} {'type': 'loss', 'content': 0.10712040215730667, 'timestamp': '2025-09-10 02:45:02.229066', 'step': 8823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:02.296716', 'step': 8823, 'epoch': 2} {'type': 'loss', 'content': 0.16505055129528046, 'timestamp': '2025-09-10 02:45:02.303180', 'step': 8824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:02.357216', 'step': 8824, 'epoch': 2} {'type': 'loss', 'content': 0.09522642940282822, 'timestamp': '2025-09-10 02:45:02.359606', 'step': 8825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:02.413532', 'step': 8825, 'epoch': 2} {'type': 'loss', 'content': 0.10462313890457153, 'timestamp': '2025-09-10 02:45:02.418872', 'step': 8826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:02.475174', 'step': 8826, 'epoch': 2} {'type': 'loss', 'content': 0.15062130987644196, 'timestamp': '2025-09-10 02:45:02.477757', 'step': 8827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:02.535932', 'step': 8827, 'epoch': 2} {'type': 'loss', 'content': 0.06421336531639099, 'timestamp': '2025-09-10 02:45:02.542123', 'step': 8828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:02.596559', 'step': 8828, 'epoch': 2} {'type': 'loss', 'content': 0.11493809521198273, 'timestamp': '2025-09-10 02:45:02.598794', 'step': 8829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:02.654437', 'step': 8829, 'epoch': 2} {'type': 'loss', 'content': 0.13884997367858887, 'timestamp': '2025-09-10 02:45:02.656645', 'step': 8830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:02.717982', 'step': 8830, 'epoch': 2} {'type': 'loss', 'content': 0.17371194064617157, 'timestamp': '2025-09-10 02:45:02.720447', 'step': 8831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:02.774110', 'step': 8831, 'epoch': 2} {'type': 'loss', 'content': 0.09512510895729065, 'timestamp': '2025-09-10 02:45:02.780449', 'step': 8832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:02.835802', 'step': 8832, 'epoch': 2} {'type': 'loss', 'content': 0.03646228089928627, 'timestamp': '2025-09-10 02:45:02.838215', 'step': 8833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:02.891681', 'step': 8833, 'epoch': 2} {'type': 'loss', 'content': 0.17691370844841003, 'timestamp': '2025-09-10 02:45:02.896506', 'step': 8834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:02.955513', 'step': 8834, 'epoch': 2} {'type': 'loss', 'content': 0.12656161189079285, 'timestamp': '2025-09-10 02:45:02.959579', 'step': 8835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:03.015907', 'step': 8835, 'epoch': 2} {'type': 'loss', 'content': 0.23677605390548706, 'timestamp': '2025-09-10 02:45:03.023410', 'step': 8836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:03.085180', 'step': 8836, 'epoch': 2} {'type': 'loss', 'content': 0.11757417023181915, 'timestamp': '2025-09-10 02:45:03.087944', 'step': 8837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:03.150225', 'step': 8837, 'epoch': 2} {'type': 'loss', 'content': 0.21239836513996124, 'timestamp': '2025-09-10 02:45:03.152607', 'step': 8838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:03.212526', 'step': 8838, 'epoch': 2} {'type': 'loss', 'content': 0.12002672255039215, 'timestamp': '2025-09-10 02:45:03.215364', 'step': 8839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:03.271587', 'step': 8839, 'epoch': 2} {'type': 'loss', 'content': 0.11196029186248779, 'timestamp': '2025-09-10 02:45:03.278164', 'step': 8840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:03.332293', 'step': 8840, 'epoch': 2} {'type': 'loss', 'content': 0.09949563443660736, 'timestamp': '2025-09-10 02:45:03.335076', 'step': 8841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:03.391987', 'step': 8841, 'epoch': 2} {'type': 'loss', 'content': 0.07467108219861984, 'timestamp': '2025-09-10 02:45:03.394414', 'step': 8842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:03.447809', 'step': 8842, 'epoch': 2} {'type': 'loss', 'content': 0.12953627109527588, 'timestamp': '2025-09-10 02:45:03.450536', 'step': 8843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:03.504493', 'step': 8843, 'epoch': 2} {'type': 'loss', 'content': 0.08890210837125778, 'timestamp': '2025-09-10 02:45:03.510668', 'step': 8844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:03.564431', 'step': 8844, 'epoch': 2} {'type': 'loss', 'content': 0.14298273622989655, 'timestamp': '2025-09-10 02:45:03.566597', 'step': 8845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:03.622999', 'step': 8845, 'epoch': 2} {'type': 'loss', 'content': 0.14266392588615417, 'timestamp': '2025-09-10 02:45:03.625186', 'step': 8846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:03.678312', 'step': 8846, 'epoch': 2} {'type': 'loss', 'content': 0.09112387895584106, 'timestamp': '2025-09-10 02:45:03.680691', 'step': 8847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:03.734740', 'step': 8847, 'epoch': 2} {'type': 'loss', 'content': 0.10770687460899353, 'timestamp': '2025-09-10 02:45:03.741860', 'step': 8848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:03.801903', 'step': 8848, 'epoch': 2} {'type': 'loss', 'content': 0.1780533641576767, 'timestamp': '2025-09-10 02:45:03.804169', 'step': 8849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:03.867496', 'step': 8849, 'epoch': 2} {'type': 'loss', 'content': 0.13987115025520325, 'timestamp': '2025-09-10 02:45:03.869945', 'step': 8850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:03.934066', 'step': 8850, 'epoch': 2} {'type': 'loss', 'content': 0.1603623777627945, 'timestamp': '2025-09-10 02:45:03.936301', 'step': 8851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:03.994796', 'step': 8851, 'epoch': 2} {'type': 'loss', 'content': 0.047051023691892624, 'timestamp': '2025-09-10 02:45:04.001190', 'step': 8852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:04.054274', 'step': 8852, 'epoch': 2} {'type': 'loss', 'content': 0.1622444987297058, 'timestamp': '2025-09-10 02:45:04.056518', 'step': 8853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:04.109892', 'step': 8853, 'epoch': 2} {'type': 'loss', 'content': 0.14121422171592712, 'timestamp': '2025-09-10 02:45:04.112632', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:45:17.017796', 'step': 8854, 'epoch': 2} {'type': 'pplx', 'content': 13768.250130148726, 'timestamp': '2025-09-10 02:45:17.020988', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:17.076314', 'step': 8854, 'epoch': 2} {'type': 'loss', 'content': 0.1063714399933815, 'timestamp': '2025-09-10 02:45:17.078639', 'step': 8855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:17.135100', 'step': 8855, 'epoch': 2} {'type': 'loss', 'content': 0.19243454933166504, 'timestamp': '2025-09-10 02:45:17.141699', 'step': 8856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:17.195473', 'step': 8856, 'epoch': 2} {'type': 'loss', 'content': 0.14216281473636627, 'timestamp': '2025-09-10 02:45:17.197762', 'step': 8857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:17.252460', 'step': 8857, 'epoch': 2} {'type': 'loss', 'content': 0.09943608939647675, 'timestamp': '2025-09-10 02:45:17.254721', 'step': 8858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:17.309110', 'step': 8858, 'epoch': 2} {'type': 'loss', 'content': 0.1650400012731552, 'timestamp': '2025-09-10 02:45:17.311428', 'step': 8859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:17.364930', 'step': 8859, 'epoch': 2} {'type': 'loss', 'content': 0.11154879629611969, 'timestamp': '2025-09-10 02:45:17.371205', 'step': 8860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:17.424691', 'step': 8860, 'epoch': 2} {'type': 'loss', 'content': 0.16357389092445374, 'timestamp': '2025-09-10 02:45:17.426995', 'step': 8861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:17.481368', 'step': 8861, 'epoch': 2} {'type': 'loss', 'content': 0.11599495261907578, 'timestamp': '2025-09-10 02:45:17.483607', 'step': 8862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:17.537229', 'step': 8862, 'epoch': 2} {'type': 'loss', 'content': 0.07374356687068939, 'timestamp': '2025-09-10 02:45:17.539493', 'step': 8863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:17.593564', 'step': 8863, 'epoch': 2} {'type': 'loss', 'content': 0.13423359394073486, 'timestamp': '2025-09-10 02:45:17.599876', 'step': 8864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:17.652970', 'step': 8864, 'epoch': 2} {'type': 'loss', 'content': 0.14982551336288452, 'timestamp': '2025-09-10 02:45:17.655526', 'step': 8865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:17.709409', 'step': 8865, 'epoch': 2} {'type': 'loss', 'content': 0.10764575004577637, 'timestamp': '2025-09-10 02:45:17.711674', 'step': 8866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:17.765729', 'step': 8866, 'epoch': 2} {'type': 'loss', 'content': 0.13719163835048676, 'timestamp': '2025-09-10 02:45:17.767962', 'step': 8867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:17.822265', 'step': 8867, 'epoch': 2} {'type': 'loss', 'content': 0.21968352794647217, 'timestamp': '2025-09-10 02:45:17.828394', 'step': 8868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:17.882285', 'step': 8868, 'epoch': 2} {'type': 'loss', 'content': 0.09432368725538254, 'timestamp': '2025-09-10 02:45:17.884580', 'step': 8869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:17.937778', 'step': 8869, 'epoch': 2} {'type': 'loss', 'content': 0.10782261937856674, 'timestamp': '2025-09-10 02:45:17.940093', 'step': 8870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:17.993767', 'step': 8870, 'epoch': 2} {'type': 'loss', 'content': 0.13141342997550964, 'timestamp': '2025-09-10 02:45:17.996101', 'step': 8871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:18.050117', 'step': 8871, 'epoch': 2} {'type': 'loss', 'content': 0.0865023210644722, 'timestamp': '2025-09-10 02:45:18.056215', 'step': 8872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:18.109004', 'step': 8872, 'epoch': 2} {'type': 'loss', 'content': 0.18554003536701202, 'timestamp': '2025-09-10 02:45:18.112688', 'step': 8873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:18.166309', 'step': 8873, 'epoch': 2} {'type': 'loss', 'content': 0.12202711403369904, 'timestamp': '2025-09-10 02:45:18.168605', 'step': 8874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:18.222010', 'step': 8874, 'epoch': 2} {'type': 'loss', 'content': 0.14373302459716797, 'timestamp': '2025-09-10 02:45:18.224307', 'step': 8875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:18.280930', 'step': 8875, 'epoch': 2} {'type': 'loss', 'content': 0.08984464406967163, 'timestamp': '2025-09-10 02:45:18.287397', 'step': 8876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:18.342082', 'step': 8876, 'epoch': 2} {'type': 'loss', 'content': 0.22961273789405823, 'timestamp': '2025-09-10 02:45:18.344407', 'step': 8877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:18.402058', 'step': 8877, 'epoch': 2} {'type': 'loss', 'content': 0.16675366461277008, 'timestamp': '2025-09-10 02:45:18.404570', 'step': 8878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:18.461480', 'step': 8878, 'epoch': 2} {'type': 'loss', 'content': 0.1608743965625763, 'timestamp': '2025-09-10 02:45:18.463890', 'step': 8879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:18.521458', 'step': 8879, 'epoch': 2} {'type': 'loss', 'content': 0.21814922988414764, 'timestamp': '2025-09-10 02:45:18.528251', 'step': 8880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:18.585871', 'step': 8880, 'epoch': 2} {'type': 'loss', 'content': 0.1077762320637703, 'timestamp': '2025-09-10 02:45:18.588154', 'step': 8881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:18.645601', 'step': 8881, 'epoch': 2} {'type': 'loss', 'content': 0.17482741177082062, 'timestamp': '2025-09-10 02:45:18.647946', 'step': 8882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:18.704663', 'step': 8882, 'epoch': 2} {'type': 'loss', 'content': 0.11010363698005676, 'timestamp': '2025-09-10 02:45:18.707075', 'step': 8883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:18.764606', 'step': 8883, 'epoch': 2} {'type': 'loss', 'content': 0.17672203481197357, 'timestamp': '2025-09-10 02:45:18.771275', 'step': 8884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:18.827030', 'step': 8884, 'epoch': 2} {'type': 'loss', 'content': 0.12487286329269409, 'timestamp': '2025-09-10 02:45:18.829141', 'step': 8885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:18.882760', 'step': 8885, 'epoch': 2} {'type': 'loss', 'content': 0.23167447745800018, 'timestamp': '2025-09-10 02:45:18.884862', 'step': 8886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:18.938523', 'step': 8886, 'epoch': 2} {'type': 'loss', 'content': 0.13252247869968414, 'timestamp': '2025-09-10 02:45:18.940802', 'step': 8887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:18.994967', 'step': 8887, 'epoch': 2} {'type': 'loss', 'content': 0.11026564240455627, 'timestamp': '2025-09-10 02:45:19.001259', 'step': 8888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:19.056307', 'step': 8888, 'epoch': 2} {'type': 'loss', 'content': 0.08872178941965103, 'timestamp': '2025-09-10 02:45:19.058781', 'step': 8889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:19.113650', 'step': 8889, 'epoch': 2} {'type': 'loss', 'content': 0.17092956602573395, 'timestamp': '2025-09-10 02:45:19.115926', 'step': 8890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:19.169551', 'step': 8890, 'epoch': 2} {'type': 'loss', 'content': 0.14050441980361938, 'timestamp': '2025-09-10 02:45:19.171698', 'step': 8891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:19.225711', 'step': 8891, 'epoch': 2} {'type': 'loss', 'content': 0.18427829444408417, 'timestamp': '2025-09-10 02:45:19.232109', 'step': 8892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:19.285417', 'step': 8892, 'epoch': 2} {'type': 'loss', 'content': 0.08229036629199982, 'timestamp': '2025-09-10 02:45:19.287827', 'step': 8893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:19.341647', 'step': 8893, 'epoch': 2} {'type': 'loss', 'content': 0.17578142881393433, 'timestamp': '2025-09-10 02:45:19.344051', 'step': 8894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:19.398651', 'step': 8894, 'epoch': 2} {'type': 'loss', 'content': 0.09124687314033508, 'timestamp': '2025-09-10 02:45:19.400955', 'step': 8895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:19.454769', 'step': 8895, 'epoch': 2} {'type': 'loss', 'content': 0.08587411046028137, 'timestamp': '2025-09-10 02:45:19.460961', 'step': 8896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:19.514684', 'step': 8896, 'epoch': 2} {'type': 'loss', 'content': 0.1479228287935257, 'timestamp': '2025-09-10 02:45:19.516945', 'step': 8897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:19.570900', 'step': 8897, 'epoch': 2} {'type': 'loss', 'content': 0.12704385817050934, 'timestamp': '2025-09-10 02:45:19.573224', 'step': 8898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:19.634591', 'step': 8898, 'epoch': 2} {'type': 'loss', 'content': 0.12633274495601654, 'timestamp': '2025-09-10 02:45:19.637046', 'step': 8899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:19.697486', 'step': 8899, 'epoch': 2} {'type': 'loss', 'content': 0.1379895657300949, 'timestamp': '2025-09-10 02:45:19.704858', 'step': 8900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:19.766245', 'step': 8900, 'epoch': 2} {'type': 'loss', 'content': 0.08762601763010025, 'timestamp': '2025-09-10 02:45:19.768850', 'step': 8901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:19.831108', 'step': 8901, 'epoch': 2} {'type': 'loss', 'content': 0.18006092309951782, 'timestamp': '2025-09-10 02:45:19.833366', 'step': 8902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:19.893751', 'step': 8902, 'epoch': 2} {'type': 'loss', 'content': 0.13269619643688202, 'timestamp': '2025-09-10 02:45:19.896149', 'step': 8903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:19.954225', 'step': 8903, 'epoch': 2} {'type': 'loss', 'content': 0.18506911396980286, 'timestamp': '2025-09-10 02:45:19.960862', 'step': 8904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:20.015673', 'step': 8904, 'epoch': 2} {'type': 'loss', 'content': 0.2819826006889343, 'timestamp': '2025-09-10 02:45:20.017919', 'step': 8905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:20.071165', 'step': 8905, 'epoch': 2} {'type': 'loss', 'content': 0.08731016516685486, 'timestamp': '2025-09-10 02:45:20.073432', 'step': 8906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:20.130532', 'step': 8906, 'epoch': 2} {'type': 'loss', 'content': 0.13182611763477325, 'timestamp': '2025-09-10 02:45:20.132931', 'step': 8907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:20.188477', 'step': 8907, 'epoch': 2} {'type': 'loss', 'content': 0.19503507018089294, 'timestamp': '2025-09-10 02:45:20.194790', 'step': 8908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:20.247759', 'step': 8908, 'epoch': 2} {'type': 'loss', 'content': 0.11310461163520813, 'timestamp': '2025-09-10 02:45:20.250396', 'step': 8909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:20.305032', 'step': 8909, 'epoch': 2} {'type': 'loss', 'content': 0.15088243782520294, 'timestamp': '2025-09-10 02:45:20.307579', 'step': 8910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:20.361673', 'step': 8910, 'epoch': 2} {'type': 'loss', 'content': 0.19945134222507477, 'timestamp': '2025-09-10 02:45:20.364483', 'step': 8911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:20.418680', 'step': 8911, 'epoch': 2} {'type': 'loss', 'content': 0.10501636564731598, 'timestamp': '2025-09-10 02:45:20.424671', 'step': 8912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:20.477556', 'step': 8912, 'epoch': 2} {'type': 'loss', 'content': 0.11770278960466385, 'timestamp': '2025-09-10 02:45:20.479756', 'step': 8913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:20.533545', 'step': 8913, 'epoch': 2} {'type': 'loss', 'content': 0.08865243941545486, 'timestamp': '2025-09-10 02:45:20.535804', 'step': 8914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:20.589541', 'step': 8914, 'epoch': 2} {'type': 'loss', 'content': 0.2262403666973114, 'timestamp': '2025-09-10 02:45:20.591822', 'step': 8915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:20.645378', 'step': 8915, 'epoch': 2} {'type': 'loss', 'content': 0.06405766308307648, 'timestamp': '2025-09-10 02:45:20.651394', 'step': 8916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:20.705359', 'step': 8916, 'epoch': 2} {'type': 'loss', 'content': 0.06315258890390396, 'timestamp': '2025-09-10 02:45:20.708576', 'step': 8917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:20.761822', 'step': 8917, 'epoch': 2} {'type': 'loss', 'content': 0.07978300750255585, 'timestamp': '2025-09-10 02:45:20.764185', 'step': 8918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:20.818080', 'step': 8918, 'epoch': 2} {'type': 'loss', 'content': 0.14042317867279053, 'timestamp': '2025-09-10 02:45:20.820438', 'step': 8919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:20.874731', 'step': 8919, 'epoch': 2} {'type': 'loss', 'content': 0.12220320850610733, 'timestamp': '2025-09-10 02:45:20.880641', 'step': 8920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:20.932938', 'step': 8920, 'epoch': 2} {'type': 'loss', 'content': 0.10558375716209412, 'timestamp': '2025-09-10 02:45:20.936437', 'step': 8921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:20.991246', 'step': 8921, 'epoch': 2} {'type': 'loss', 'content': 0.1576298028230667, 'timestamp': '2025-09-10 02:45:20.993674', 'step': 8922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:21.049026', 'step': 8922, 'epoch': 2} {'type': 'loss', 'content': 0.24508821964263916, 'timestamp': '2025-09-10 02:45:21.051362', 'step': 8923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:21.105333', 'step': 8923, 'epoch': 2} {'type': 'loss', 'content': 0.16626940667629242, 'timestamp': '2025-09-10 02:45:21.111218', 'step': 8924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:21.169873', 'step': 8924, 'epoch': 2} {'type': 'loss', 'content': 0.14966468513011932, 'timestamp': '2025-09-10 02:45:21.173588', 'step': 8925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:21.229326', 'step': 8925, 'epoch': 2} {'type': 'loss', 'content': 0.11278431862592697, 'timestamp': '2025-09-10 02:45:21.232661', 'step': 8926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:21.292151', 'step': 8926, 'epoch': 2} {'type': 'loss', 'content': 0.07521558552980423, 'timestamp': '2025-09-10 02:45:21.294379', 'step': 8927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:21.347970', 'step': 8927, 'epoch': 2} {'type': 'loss', 'content': 0.22668446600437164, 'timestamp': '2025-09-10 02:45:21.359032', 'step': 8928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:21.412639', 'step': 8928, 'epoch': 2} {'type': 'loss', 'content': 0.22751617431640625, 'timestamp': '2025-09-10 02:45:21.414876', 'step': 8929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:21.469205', 'step': 8929, 'epoch': 2} {'type': 'loss', 'content': 0.18000803887844086, 'timestamp': '2025-09-10 02:45:21.471539', 'step': 8930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:21.533829', 'step': 8930, 'epoch': 2} {'type': 'loss', 'content': 0.10983602702617645, 'timestamp': '2025-09-10 02:45:21.536153', 'step': 8931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:21.591781', 'step': 8931, 'epoch': 2} {'type': 'loss', 'content': 0.08792078495025635, 'timestamp': '2025-09-10 02:45:21.597731', 'step': 8932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:21.650876', 'step': 8932, 'epoch': 2} {'type': 'loss', 'content': 0.08175943791866302, 'timestamp': '2025-09-10 02:45:21.653178', 'step': 8933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:21.709819', 'step': 8933, 'epoch': 2} {'type': 'loss', 'content': 0.1456519067287445, 'timestamp': '2025-09-10 02:45:21.712218', 'step': 8934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:21.767029', 'step': 8934, 'epoch': 2} {'type': 'loss', 'content': 0.1057511493563652, 'timestamp': '2025-09-10 02:45:21.769382', 'step': 8935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:21.824661', 'step': 8935, 'epoch': 2} {'type': 'loss', 'content': 0.07339756190776825, 'timestamp': '2025-09-10 02:45:21.830934', 'step': 8936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:21.885523', 'step': 8936, 'epoch': 2} {'type': 'loss', 'content': 0.19623436033725739, 'timestamp': '2025-09-10 02:45:21.887817', 'step': 8937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:21.940880', 'step': 8937, 'epoch': 2} {'type': 'loss', 'content': 0.048996519297361374, 'timestamp': '2025-09-10 02:45:21.943199', 'step': 8938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:21.998439', 'step': 8938, 'epoch': 2} {'type': 'loss', 'content': 0.19641618430614471, 'timestamp': '2025-09-10 02:45:22.002258', 'step': 8939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:22.057061', 'step': 8939, 'epoch': 2} {'type': 'loss', 'content': 0.13463468849658966, 'timestamp': '2025-09-10 02:45:22.062947', 'step': 8940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:22.115788', 'step': 8940, 'epoch': 2} {'type': 'loss', 'content': 0.08409224450588226, 'timestamp': '2025-09-10 02:45:22.120211', 'step': 8941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:22.182221', 'step': 8941, 'epoch': 2} {'type': 'loss', 'content': 0.11022575199604034, 'timestamp': '2025-09-10 02:45:22.184472', 'step': 8942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:22.237659', 'step': 8942, 'epoch': 2} {'type': 'loss', 'content': 0.1092306450009346, 'timestamp': '2025-09-10 02:45:22.240398', 'step': 8943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:22.298154', 'step': 8943, 'epoch': 2} {'type': 'loss', 'content': 0.10237280279397964, 'timestamp': '2025-09-10 02:45:22.304263', 'step': 8944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:22.358082', 'step': 8944, 'epoch': 2} {'type': 'loss', 'content': 0.1496468484401703, 'timestamp': '2025-09-10 02:45:22.362509', 'step': 8945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:22.420773', 'step': 8945, 'epoch': 2} {'type': 'loss', 'content': 0.1368308663368225, 'timestamp': '2025-09-10 02:45:22.424521', 'step': 8946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:22.479346', 'step': 8946, 'epoch': 2} {'type': 'loss', 'content': 0.09834261238574982, 'timestamp': '2025-09-10 02:45:22.481483', 'step': 8947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:22.535665', 'step': 8947, 'epoch': 2} {'type': 'loss', 'content': 0.14802171289920807, 'timestamp': '2025-09-10 02:45:22.541582', 'step': 8948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:22.594893', 'step': 8948, 'epoch': 2} {'type': 'loss', 'content': 0.15995542705059052, 'timestamp': '2025-09-10 02:45:22.599440', 'step': 8949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:22.652542', 'step': 8949, 'epoch': 2} {'type': 'loss', 'content': 0.1665925830602646, 'timestamp': '2025-09-10 02:45:22.657752', 'step': 8950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:22.714092', 'step': 8950, 'epoch': 2} {'type': 'loss', 'content': 0.13818159699440002, 'timestamp': '2025-09-10 02:45:22.716423', 'step': 8951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:22.771081', 'step': 8951, 'epoch': 2} {'type': 'loss', 'content': 0.10275711864233017, 'timestamp': '2025-09-10 02:45:22.776900', 'step': 8952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:22.831255', 'step': 8952, 'epoch': 2} {'type': 'loss', 'content': 0.1817893236875534, 'timestamp': '2025-09-10 02:45:22.833481', 'step': 8953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:22.890824', 'step': 8953, 'epoch': 2} {'type': 'loss', 'content': 0.1118585467338562, 'timestamp': '2025-09-10 02:45:22.893240', 'step': 8954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:22.952702', 'step': 8954, 'epoch': 2} {'type': 'loss', 'content': 0.13340923190116882, 'timestamp': '2025-09-10 02:45:22.955064', 'step': 8955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:23.009159', 'step': 8955, 'epoch': 2} {'type': 'loss', 'content': 0.27536553144454956, 'timestamp': '2025-09-10 02:45:23.015337', 'step': 8956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:23.069086', 'step': 8956, 'epoch': 2} {'type': 'loss', 'content': 0.15619517862796783, 'timestamp': '2025-09-10 02:45:23.071406', 'step': 8957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:23.125258', 'step': 8957, 'epoch': 2} {'type': 'loss', 'content': 0.10023234784603119, 'timestamp': '2025-09-10 02:45:23.127308', 'step': 8958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:23.180369', 'step': 8958, 'epoch': 2} {'type': 'loss', 'content': 0.09340129792690277, 'timestamp': '2025-09-10 02:45:23.182405', 'step': 8959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:23.235695', 'step': 8959, 'epoch': 2} {'type': 'loss', 'content': 0.1292247623205185, 'timestamp': '2025-09-10 02:45:23.241601', 'step': 8960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:23.295603', 'step': 8960, 'epoch': 2} {'type': 'loss', 'content': 0.19500413537025452, 'timestamp': '2025-09-10 02:45:23.297905', 'step': 8961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:23.351485', 'step': 8961, 'epoch': 2} {'type': 'loss', 'content': 0.18536809086799622, 'timestamp': '2025-09-10 02:45:23.353535', 'step': 8962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:23.408993', 'step': 8962, 'epoch': 2} {'type': 'loss', 'content': 0.163147434592247, 'timestamp': '2025-09-10 02:45:23.411468', 'step': 8963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:23.465604', 'step': 8963, 'epoch': 2} {'type': 'loss', 'content': 0.08181323856115341, 'timestamp': '2025-09-10 02:45:23.471703', 'step': 8964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:23.525653', 'step': 8964, 'epoch': 2} {'type': 'loss', 'content': 0.14530570805072784, 'timestamp': '2025-09-10 02:45:23.528074', 'step': 8965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:23.581375', 'step': 8965, 'epoch': 2} {'type': 'loss', 'content': 0.13571828603744507, 'timestamp': '2025-09-10 02:45:23.583812', 'step': 8966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:23.636677', 'step': 8966, 'epoch': 2} {'type': 'loss', 'content': 0.08367471396923065, 'timestamp': '2025-09-10 02:45:23.638744', 'step': 8967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:23.691876', 'step': 8967, 'epoch': 2} {'type': 'loss', 'content': 0.1545151174068451, 'timestamp': '2025-09-10 02:45:23.697587', 'step': 8968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:23.751425', 'step': 8968, 'epoch': 2} {'type': 'loss', 'content': 0.1329444944858551, 'timestamp': '2025-09-10 02:45:23.753683', 'step': 8969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:23.807087', 'step': 8969, 'epoch': 2} {'type': 'loss', 'content': 0.1136932298541069, 'timestamp': '2025-09-10 02:45:23.809185', 'step': 8970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:23.862978', 'step': 8970, 'epoch': 2} {'type': 'loss', 'content': 0.0692819282412529, 'timestamp': '2025-09-10 02:45:23.865129', 'step': 8971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:23.919069', 'step': 8971, 'epoch': 2} {'type': 'loss', 'content': 0.13240185379981995, 'timestamp': '2025-09-10 02:45:23.925016', 'step': 8972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:23.977986', 'step': 8972, 'epoch': 2} {'type': 'loss', 'content': 0.13585108518600464, 'timestamp': '2025-09-10 02:45:23.980193', 'step': 8973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:24.032923', 'step': 8973, 'epoch': 2} {'type': 'loss', 'content': 0.28468772768974304, 'timestamp': '2025-09-10 02:45:24.035394', 'step': 8974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:24.089347', 'step': 8974, 'epoch': 2} {'type': 'loss', 'content': 0.1404530555009842, 'timestamp': '2025-09-10 02:45:24.091397', 'step': 8975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:24.144447', 'step': 8975, 'epoch': 2} {'type': 'loss', 'content': 0.19182822108268738, 'timestamp': '2025-09-10 02:45:24.150203', 'step': 8976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:24.202861', 'step': 8976, 'epoch': 2} {'type': 'loss', 'content': 0.09901079535484314, 'timestamp': '2025-09-10 02:45:24.204888', 'step': 8977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:24.258419', 'step': 8977, 'epoch': 2} {'type': 'loss', 'content': 0.1329299658536911, 'timestamp': '2025-09-10 02:45:24.260598', 'step': 8978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:24.313888', 'step': 8978, 'epoch': 2} {'type': 'loss', 'content': 0.25769275426864624, 'timestamp': '2025-09-10 02:45:24.316113', 'step': 8979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:24.369957', 'step': 8979, 'epoch': 2} {'type': 'loss', 'content': 0.07294107973575592, 'timestamp': '2025-09-10 02:45:24.375986', 'step': 8980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:24.429338', 'step': 8980, 'epoch': 2} {'type': 'loss', 'content': 0.03853956609964371, 'timestamp': '2025-09-10 02:45:24.431797', 'step': 8981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:24.486835', 'step': 8981, 'epoch': 2} {'type': 'loss', 'content': 0.1620403528213501, 'timestamp': '2025-09-10 02:45:24.488895', 'step': 8982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:24.541997', 'step': 8982, 'epoch': 2} {'type': 'loss', 'content': 0.12888237833976746, 'timestamp': '2025-09-10 02:45:24.544114', 'step': 8983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:24.597425', 'step': 8983, 'epoch': 2} {'type': 'loss', 'content': 0.15032655000686646, 'timestamp': '2025-09-10 02:45:24.603667', 'step': 8984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:24.656650', 'step': 8984, 'epoch': 2} {'type': 'loss', 'content': 0.1595277637243271, 'timestamp': '2025-09-10 02:45:24.658988', 'step': 8985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:24.712134', 'step': 8985, 'epoch': 2} {'type': 'loss', 'content': 0.17264266312122345, 'timestamp': '2025-09-10 02:45:24.714400', 'step': 8986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:24.768266', 'step': 8986, 'epoch': 2} {'type': 'loss', 'content': 0.1877458095550537, 'timestamp': '2025-09-10 02:45:24.770474', 'step': 8987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:24.823932', 'step': 8987, 'epoch': 2} {'type': 'loss', 'content': 0.10537100583314896, 'timestamp': '2025-09-10 02:45:24.829940', 'step': 8988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:24.882957', 'step': 8988, 'epoch': 2} {'type': 'loss', 'content': 0.11825291067361832, 'timestamp': '2025-09-10 02:45:24.885212', 'step': 8989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:24.938665', 'step': 8989, 'epoch': 2} {'type': 'loss', 'content': 0.08366123586893082, 'timestamp': '2025-09-10 02:45:24.941051', 'step': 8990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:25.000424', 'step': 8990, 'epoch': 2} {'type': 'loss', 'content': 0.10429840534925461, 'timestamp': '2025-09-10 02:45:25.002768', 'step': 8991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:25.056758', 'step': 8991, 'epoch': 2} {'type': 'loss', 'content': 0.09880026429891586, 'timestamp': '2025-09-10 02:45:25.062554', 'step': 8992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:25.115712', 'step': 8992, 'epoch': 2} {'type': 'loss', 'content': 0.12515880167484283, 'timestamp': '2025-09-10 02:45:25.118184', 'step': 8993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:25.171846', 'step': 8993, 'epoch': 2} {'type': 'loss', 'content': 0.09494676440954208, 'timestamp': '2025-09-10 02:45:25.174423', 'step': 8994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:25.227666', 'step': 8994, 'epoch': 2} {'type': 'loss', 'content': 0.14224383234977722, 'timestamp': '2025-09-10 02:45:25.230092', 'step': 8995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:25.283598', 'step': 8995, 'epoch': 2} {'type': 'loss', 'content': 0.06454135477542877, 'timestamp': '2025-09-10 02:45:25.289421', 'step': 8996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:25.342058', 'step': 8996, 'epoch': 2} {'type': 'loss', 'content': 0.09990988671779633, 'timestamp': '2025-09-10 02:45:25.344344', 'step': 8997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:25.397771', 'step': 8997, 'epoch': 2} {'type': 'loss', 'content': 0.26725828647613525, 'timestamp': '2025-09-10 02:45:25.400680', 'step': 8998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:25.454902', 'step': 8998, 'epoch': 2} {'type': 'loss', 'content': 0.13908903300762177, 'timestamp': '2025-09-10 02:45:25.457300', 'step': 8999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:25.511629', 'step': 8999, 'epoch': 2} {'type': 'loss', 'content': 0.1283857822418213, 'timestamp': '2025-09-10 02:45:25.517575', 'step': 9000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9000', 'timestamp': '2025-09-10 02:45:26.153650', 'step': 9000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:26.213048', 'step': 9000, 'epoch': 2} {'type': 'loss', 'content': 0.18678249418735504, 'timestamp': '2025-09-10 02:45:26.215308', 'step': 9001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:26.270761', 'step': 9001, 'epoch': 2} {'type': 'loss', 'content': 0.1417381316423416, 'timestamp': '2025-09-10 02:45:26.273000', 'step': 9002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:26.327051', 'step': 9002, 'epoch': 2} {'type': 'loss', 'content': 0.07578767836093903, 'timestamp': '2025-09-10 02:45:26.329118', 'step': 9003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:26.382664', 'step': 9003, 'epoch': 2} {'type': 'loss', 'content': 0.08643017709255219, 'timestamp': '2025-09-10 02:45:26.388878', 'step': 9004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:26.441384', 'step': 9004, 'epoch': 2} {'type': 'loss', 'content': 0.2516026794910431, 'timestamp': '2025-09-10 02:45:26.443502', 'step': 9005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:26.496653', 'step': 9005, 'epoch': 2} {'type': 'loss', 'content': 0.067627914249897, 'timestamp': '2025-09-10 02:45:26.498953', 'step': 9006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:26.552996', 'step': 9006, 'epoch': 2} {'type': 'loss', 'content': 0.0735350102186203, 'timestamp': '2025-09-10 02:45:26.555309', 'step': 9007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:26.613428', 'step': 9007, 'epoch': 2} {'type': 'loss', 'content': 0.1459103226661682, 'timestamp': '2025-09-10 02:45:26.619835', 'step': 9008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:26.673942', 'step': 9008, 'epoch': 2} {'type': 'loss', 'content': 0.1509959101676941, 'timestamp': '2025-09-10 02:45:26.676414', 'step': 9009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:26.729825', 'step': 9009, 'epoch': 2} {'type': 'loss', 'content': 0.09269203990697861, 'timestamp': '2025-09-10 02:45:26.732078', 'step': 9010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:26.787765', 'step': 9010, 'epoch': 2} {'type': 'loss', 'content': 0.09953653812408447, 'timestamp': '2025-09-10 02:45:26.790082', 'step': 9011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:26.843324', 'step': 9011, 'epoch': 2} {'type': 'loss', 'content': 0.0512370839715004, 'timestamp': '2025-09-10 02:45:26.849429', 'step': 9012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:26.902114', 'step': 9012, 'epoch': 2} {'type': 'loss', 'content': 0.14661452174186707, 'timestamp': '2025-09-10 02:45:26.904336', 'step': 9013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:26.958321', 'step': 9013, 'epoch': 2} {'type': 'loss', 'content': 0.15453177690505981, 'timestamp': '2025-09-10 02:45:26.960580', 'step': 9014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:27.014267', 'step': 9014, 'epoch': 2} {'type': 'loss', 'content': 0.20751823484897614, 'timestamp': '2025-09-10 02:45:27.016526', 'step': 9015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:27.069977', 'step': 9015, 'epoch': 2} {'type': 'loss', 'content': 0.14864115417003632, 'timestamp': '2025-09-10 02:45:27.075882', 'step': 9016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:27.131047', 'step': 9016, 'epoch': 2} {'type': 'loss', 'content': 0.13601616024971008, 'timestamp': '2025-09-10 02:45:27.133164', 'step': 9017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:27.187000', 'step': 9017, 'epoch': 2} {'type': 'loss', 'content': 0.06737647205591202, 'timestamp': '2025-09-10 02:45:27.189312', 'step': 9018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:27.242995', 'step': 9018, 'epoch': 2} {'type': 'loss', 'content': 0.125531405210495, 'timestamp': '2025-09-10 02:45:27.245401', 'step': 9019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:27.306868', 'step': 9019, 'epoch': 2} {'type': 'loss', 'content': 0.1824677288532257, 'timestamp': '2025-09-10 02:45:27.312867', 'step': 9020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:27.373850', 'step': 9020, 'epoch': 2} {'type': 'loss', 'content': 0.11563701182603836, 'timestamp': '2025-09-10 02:45:27.376000', 'step': 9021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:27.431647', 'step': 9021, 'epoch': 2} {'type': 'loss', 'content': 0.09905922412872314, 'timestamp': '2025-09-10 02:45:27.434124', 'step': 9022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:27.488245', 'step': 9022, 'epoch': 2} {'type': 'loss', 'content': 0.12654748558998108, 'timestamp': '2025-09-10 02:45:27.490720', 'step': 9023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:27.544351', 'step': 9023, 'epoch': 2} {'type': 'loss', 'content': 0.09543125331401825, 'timestamp': '2025-09-10 02:45:27.550316', 'step': 9024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:27.604283', 'step': 9024, 'epoch': 2} {'type': 'loss', 'content': 0.11007045209407806, 'timestamp': '2025-09-10 02:45:27.606295', 'step': 9025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:27.659876', 'step': 9025, 'epoch': 2} {'type': 'loss', 'content': 0.1645963490009308, 'timestamp': '2025-09-10 02:45:27.662149', 'step': 9026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:27.718931', 'step': 9026, 'epoch': 2} {'type': 'loss', 'content': 0.07697609812021255, 'timestamp': '2025-09-10 02:45:27.721231', 'step': 9027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:27.774345', 'step': 9027, 'epoch': 2} {'type': 'loss', 'content': 0.14627854526042938, 'timestamp': '2025-09-10 02:45:27.780392', 'step': 9028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:27.833208', 'step': 9028, 'epoch': 2} {'type': 'loss', 'content': 0.1468212902545929, 'timestamp': '2025-09-10 02:45:27.835294', 'step': 9029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:27.888290', 'step': 9029, 'epoch': 2} {'type': 'loss', 'content': 0.15418286621570587, 'timestamp': '2025-09-10 02:45:27.890534', 'step': 9030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:27.943271', 'step': 9030, 'epoch': 2} {'type': 'loss', 'content': 0.10972413420677185, 'timestamp': '2025-09-10 02:45:27.945648', 'step': 9031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:27.998630', 'step': 9031, 'epoch': 2} {'type': 'loss', 'content': 0.19260793924331665, 'timestamp': '2025-09-10 02:45:28.004366', 'step': 9032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:28.057231', 'step': 9032, 'epoch': 2} {'type': 'loss', 'content': 0.12218748778104782, 'timestamp': '2025-09-10 02:45:28.059477', 'step': 9033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:28.112522', 'step': 9033, 'epoch': 2} {'type': 'loss', 'content': 0.09365449100732803, 'timestamp': '2025-09-10 02:45:28.114767', 'step': 9034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:28.168985', 'step': 9034, 'epoch': 2} {'type': 'loss', 'content': 0.16077212989330292, 'timestamp': '2025-09-10 02:45:28.171250', 'step': 9035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:28.224977', 'step': 9035, 'epoch': 2} {'type': 'loss', 'content': 0.16780182719230652, 'timestamp': '2025-09-10 02:45:28.231002', 'step': 9036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:28.284222', 'step': 9036, 'epoch': 2} {'type': 'loss', 'content': 0.14666934311389923, 'timestamp': '2025-09-10 02:45:28.286641', 'step': 9037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:28.339441', 'step': 9037, 'epoch': 2} {'type': 'loss', 'content': 0.12831436097621918, 'timestamp': '2025-09-10 02:45:28.341811', 'step': 9038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:28.394978', 'step': 9038, 'epoch': 2} {'type': 'loss', 'content': 0.12281721830368042, 'timestamp': '2025-09-10 02:45:28.397308', 'step': 9039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:28.449975', 'step': 9039, 'epoch': 2} {'type': 'loss', 'content': 0.1310177445411682, 'timestamp': '2025-09-10 02:45:28.455720', 'step': 9040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:28.508324', 'step': 9040, 'epoch': 2} {'type': 'loss', 'content': 0.11224404722452164, 'timestamp': '2025-09-10 02:45:28.510326', 'step': 9041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:28.563230', 'step': 9041, 'epoch': 2} {'type': 'loss', 'content': 0.1864050179719925, 'timestamp': '2025-09-10 02:45:28.565222', 'step': 9042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:28.617861', 'step': 9042, 'epoch': 2} {'type': 'loss', 'content': 0.17267823219299316, 'timestamp': '2025-09-10 02:45:28.620181', 'step': 9043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:28.672756', 'step': 9043, 'epoch': 2} {'type': 'loss', 'content': 0.11487017571926117, 'timestamp': '2025-09-10 02:45:28.678707', 'step': 9044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:28.732186', 'step': 9044, 'epoch': 2} {'type': 'loss', 'content': 0.14342841506004333, 'timestamp': '2025-09-10 02:45:28.734472', 'step': 9045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:28.788209', 'step': 9045, 'epoch': 2} {'type': 'loss', 'content': 0.11988187581300735, 'timestamp': '2025-09-10 02:45:28.790289', 'step': 9046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:28.844073', 'step': 9046, 'epoch': 2} {'type': 'loss', 'content': 0.18768952786922455, 'timestamp': '2025-09-10 02:45:28.846077', 'step': 9047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:28.901238', 'step': 9047, 'epoch': 2} {'type': 'loss', 'content': 0.1034827008843422, 'timestamp': '2025-09-10 02:45:28.907181', 'step': 9048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:28.960248', 'step': 9048, 'epoch': 2} {'type': 'loss', 'content': 0.08181721717119217, 'timestamp': '2025-09-10 02:45:28.962621', 'step': 9049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:29.016416', 'step': 9049, 'epoch': 2} {'type': 'loss', 'content': 0.1807805597782135, 'timestamp': '2025-09-10 02:45:29.019181', 'step': 9050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:29.072279', 'step': 9050, 'epoch': 2} {'type': 'loss', 'content': 0.12370581179857254, 'timestamp': '2025-09-10 02:45:29.074492', 'step': 9051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:29.127493', 'step': 9051, 'epoch': 2} {'type': 'loss', 'content': 0.206487238407135, 'timestamp': '2025-09-10 02:45:29.133281', 'step': 9052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:29.185456', 'step': 9052, 'epoch': 2} {'type': 'loss', 'content': 0.12133994698524475, 'timestamp': '2025-09-10 02:45:29.187708', 'step': 9053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:29.240820', 'step': 9053, 'epoch': 2} {'type': 'loss', 'content': 0.20595665276050568, 'timestamp': '2025-09-10 02:45:29.242947', 'step': 9054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:29.297617', 'step': 9054, 'epoch': 2} {'type': 'loss', 'content': 0.11496513336896896, 'timestamp': '2025-09-10 02:45:29.299805', 'step': 9055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:29.353310', 'step': 9055, 'epoch': 2} {'type': 'loss', 'content': 0.16900546848773956, 'timestamp': '2025-09-10 02:45:29.359349', 'step': 9056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:29.411703', 'step': 9056, 'epoch': 2} {'type': 'loss', 'content': 0.1630396991968155, 'timestamp': '2025-09-10 02:45:29.413691', 'step': 9057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:29.466330', 'step': 9057, 'epoch': 2} {'type': 'loss', 'content': 0.12921200692653656, 'timestamp': '2025-09-10 02:45:29.468650', 'step': 9058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:29.522723', 'step': 9058, 'epoch': 2} {'type': 'loss', 'content': 0.1755441576242447, 'timestamp': '2025-09-10 02:45:29.525438', 'step': 9059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:29.578927', 'step': 9059, 'epoch': 2} {'type': 'loss', 'content': 0.14688615500926971, 'timestamp': '2025-09-10 02:45:29.584891', 'step': 9060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:29.645944', 'step': 9060, 'epoch': 2} {'type': 'loss', 'content': 0.11455793678760529, 'timestamp': '2025-09-10 02:45:29.648035', 'step': 9061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:29.701130', 'step': 9061, 'epoch': 2} {'type': 'loss', 'content': 0.15632812678813934, 'timestamp': '2025-09-10 02:45:29.705399', 'step': 9062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:29.761087', 'step': 9062, 'epoch': 2} {'type': 'loss', 'content': 0.11213617026805878, 'timestamp': '2025-09-10 02:45:29.764077', 'step': 9063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:29.824172', 'step': 9063, 'epoch': 2} {'type': 'loss', 'content': 0.15583506226539612, 'timestamp': '2025-09-10 02:45:29.832021', 'step': 9064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:29.886409', 'step': 9064, 'epoch': 2} {'type': 'loss', 'content': 0.10075616091489792, 'timestamp': '2025-09-10 02:45:29.888772', 'step': 9065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:29.942223', 'step': 9065, 'epoch': 2} {'type': 'loss', 'content': 0.14072826504707336, 'timestamp': '2025-09-10 02:45:29.944565', 'step': 9066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:30.003260', 'step': 9066, 'epoch': 2} {'type': 'loss', 'content': 0.07944516092538834, 'timestamp': '2025-09-10 02:45:30.005794', 'step': 9067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:30.075505', 'step': 9067, 'epoch': 2} {'type': 'loss', 'content': 0.15930961072444916, 'timestamp': '2025-09-10 02:45:30.081701', 'step': 9068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:30.141046', 'step': 9068, 'epoch': 2} {'type': 'loss', 'content': 0.13613784313201904, 'timestamp': '2025-09-10 02:45:30.143197', 'step': 9069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:30.197001', 'step': 9069, 'epoch': 2} {'type': 'loss', 'content': 0.12058878690004349, 'timestamp': '2025-09-10 02:45:30.199062', 'step': 9070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:30.269119', 'step': 9070, 'epoch': 2} {'type': 'loss', 'content': 0.1742534637451172, 'timestamp': '2025-09-10 02:45:30.271322', 'step': 9071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:30.335818', 'step': 9071, 'epoch': 2} {'type': 'loss', 'content': 0.10653016716241837, 'timestamp': '2025-09-10 02:45:30.341892', 'step': 9072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:30.405391', 'step': 9072, 'epoch': 2} {'type': 'loss', 'content': 0.09555471688508987, 'timestamp': '2025-09-10 02:45:30.407546', 'step': 9073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:30.461139', 'step': 9073, 'epoch': 2} {'type': 'loss', 'content': 0.09977215528488159, 'timestamp': '2025-09-10 02:45:30.463384', 'step': 9074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:30.516777', 'step': 9074, 'epoch': 2} {'type': 'loss', 'content': 0.29178586602211, 'timestamp': '2025-09-10 02:45:30.522992', 'step': 9075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:30.583340', 'step': 9075, 'epoch': 2} {'type': 'loss', 'content': 0.10318963974714279, 'timestamp': '2025-09-10 02:45:30.589131', 'step': 9076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:30.642791', 'step': 9076, 'epoch': 2} {'type': 'loss', 'content': 0.14245033264160156, 'timestamp': '2025-09-10 02:45:30.644930', 'step': 9077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:30.697441', 'step': 9077, 'epoch': 2} {'type': 'loss', 'content': 0.17555265128612518, 'timestamp': '2025-09-10 02:45:30.699499', 'step': 9078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:30.752407', 'step': 9078, 'epoch': 2} {'type': 'loss', 'content': 0.09345266222953796, 'timestamp': '2025-09-10 02:45:30.754808', 'step': 9079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:30.810187', 'step': 9079, 'epoch': 2} {'type': 'loss', 'content': 0.12171552330255508, 'timestamp': '2025-09-10 02:45:30.818695', 'step': 9080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:30.872661', 'step': 9080, 'epoch': 2} {'type': 'loss', 'content': 0.1623128205537796, 'timestamp': '2025-09-10 02:45:30.876150', 'step': 9081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:30.933612', 'step': 9081, 'epoch': 2} {'type': 'loss', 'content': 0.13361108303070068, 'timestamp': '2025-09-10 02:45:30.935896', 'step': 9082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:30.990805', 'step': 9082, 'epoch': 2} {'type': 'loss', 'content': 0.15639738738536835, 'timestamp': '2025-09-10 02:45:30.993304', 'step': 9083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:31.046635', 'step': 9083, 'epoch': 2} {'type': 'loss', 'content': 0.24384450912475586, 'timestamp': '2025-09-10 02:45:31.052530', 'step': 9084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:31.113115', 'step': 9084, 'epoch': 2} {'type': 'loss', 'content': 0.10909122228622437, 'timestamp': '2025-09-10 02:45:31.115455', 'step': 9085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:31.168123', 'step': 9085, 'epoch': 2} {'type': 'loss', 'content': 0.09317005425691605, 'timestamp': '2025-09-10 02:45:31.170303', 'step': 9086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:31.223506', 'step': 9086, 'epoch': 2} {'type': 'loss', 'content': 0.10783733427524567, 'timestamp': '2025-09-10 02:45:31.225785', 'step': 9087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:31.281424', 'step': 9087, 'epoch': 2} {'type': 'loss', 'content': 0.10249663889408112, 'timestamp': '2025-09-10 02:45:31.287241', 'step': 9088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:31.340019', 'step': 9088, 'epoch': 2} {'type': 'loss', 'content': 0.10609101504087448, 'timestamp': '2025-09-10 02:45:31.342349', 'step': 9089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:31.395792', 'step': 9089, 'epoch': 2} {'type': 'loss', 'content': 0.13539092242717743, 'timestamp': '2025-09-10 02:45:31.397868', 'step': 9090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:31.461261', 'step': 9090, 'epoch': 2} {'type': 'loss', 'content': 0.07547665387392044, 'timestamp': '2025-09-10 02:45:31.465687', 'step': 9091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:31.524153', 'step': 9091, 'epoch': 2} {'type': 'loss', 'content': 0.13812409341335297, 'timestamp': '2025-09-10 02:45:31.532785', 'step': 9092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:31.589178', 'step': 9092, 'epoch': 2} {'type': 'loss', 'content': 0.13078372180461884, 'timestamp': '2025-09-10 02:45:31.591541', 'step': 9093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:31.645604', 'step': 9093, 'epoch': 2} {'type': 'loss', 'content': 0.15260271728038788, 'timestamp': '2025-09-10 02:45:31.647947', 'step': 9094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:31.703108', 'step': 9094, 'epoch': 2} {'type': 'loss', 'content': 0.11991924047470093, 'timestamp': '2025-09-10 02:45:31.707495', 'step': 9095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:31.763655', 'step': 9095, 'epoch': 2} {'type': 'loss', 'content': 0.1174607053399086, 'timestamp': '2025-09-10 02:45:31.769487', 'step': 9096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:31.822220', 'step': 9096, 'epoch': 2} {'type': 'loss', 'content': 0.2008770853281021, 'timestamp': '2025-09-10 02:45:31.825822', 'step': 9097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:31.879526', 'step': 9097, 'epoch': 2} {'type': 'loss', 'content': 0.2541868984699249, 'timestamp': '2025-09-10 02:45:31.885475', 'step': 9098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:31.949515', 'step': 9098, 'epoch': 2} {'type': 'loss', 'content': 0.18761734664440155, 'timestamp': '2025-09-10 02:45:31.951857', 'step': 9099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:32.006899', 'step': 9099, 'epoch': 2} {'type': 'loss', 'content': 0.1849948912858963, 'timestamp': '2025-09-10 02:45:32.015753', 'step': 9100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:32.069070', 'step': 9100, 'epoch': 2} {'type': 'loss', 'content': 0.17474031448364258, 'timestamp': '2025-09-10 02:45:32.071450', 'step': 9101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:32.125641', 'step': 9101, 'epoch': 2} {'type': 'loss', 'content': 0.0962350070476532, 'timestamp': '2025-09-10 02:45:32.128790', 'step': 9102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:32.184845', 'step': 9102, 'epoch': 2} {'type': 'loss', 'content': 0.168419748544693, 'timestamp': '2025-09-10 02:45:32.187018', 'step': 9103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:32.239894', 'step': 9103, 'epoch': 2} {'type': 'loss', 'content': 0.21749290823936462, 'timestamp': '2025-09-10 02:45:32.245772', 'step': 9104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:32.297838', 'step': 9104, 'epoch': 2} {'type': 'loss', 'content': 0.09910377115011215, 'timestamp': '2025-09-10 02:45:32.303787', 'step': 9105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:32.362553', 'step': 9105, 'epoch': 2} {'type': 'loss', 'content': 0.0840277224779129, 'timestamp': '2025-09-10 02:45:32.367985', 'step': 9106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:32.421521', 'step': 9106, 'epoch': 2} {'type': 'loss', 'content': 0.0771927535533905, 'timestamp': '2025-09-10 02:45:32.423603', 'step': 9107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:32.476552', 'step': 9107, 'epoch': 2} {'type': 'loss', 'content': 0.10713104903697968, 'timestamp': '2025-09-10 02:45:32.482486', 'step': 9108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:32.534502', 'step': 9108, 'epoch': 2} {'type': 'loss', 'content': 0.12194929271936417, 'timestamp': '2025-09-10 02:45:32.536672', 'step': 9109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:32.589882', 'step': 9109, 'epoch': 2} {'type': 'loss', 'content': 0.14547064900398254, 'timestamp': '2025-09-10 02:45:32.592356', 'step': 9110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:32.649803', 'step': 9110, 'epoch': 2} {'type': 'loss', 'content': 0.12977811694145203, 'timestamp': '2025-09-10 02:45:32.651961', 'step': 9111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:32.704753', 'step': 9111, 'epoch': 2} {'type': 'loss', 'content': 0.06630425900220871, 'timestamp': '2025-09-10 02:45:32.715125', 'step': 9112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:32.767467', 'step': 9112, 'epoch': 2} {'type': 'loss', 'content': 0.15186713635921478, 'timestamp': '2025-09-10 02:45:32.769748', 'step': 9113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:32.822623', 'step': 9113, 'epoch': 2} {'type': 'loss', 'content': 0.06807101517915726, 'timestamp': '2025-09-10 02:45:32.827795', 'step': 9114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:32.882311', 'step': 9114, 'epoch': 2} {'type': 'loss', 'content': 0.19598037004470825, 'timestamp': '2025-09-10 02:45:32.884384', 'step': 9115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:32.936614', 'step': 9115, 'epoch': 2} {'type': 'loss', 'content': 0.17227061092853546, 'timestamp': '2025-09-10 02:45:32.942167', 'step': 9116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:32.995411', 'step': 9116, 'epoch': 2} {'type': 'loss', 'content': 0.06862371414899826, 'timestamp': '2025-09-10 02:45:32.999920', 'step': 9117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:33.052787', 'step': 9117, 'epoch': 2} {'type': 'loss', 'content': 0.09113737940788269, 'timestamp': '2025-09-10 02:45:33.054928', 'step': 9118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:33.116357', 'step': 9118, 'epoch': 2} {'type': 'loss', 'content': 0.12379956990480423, 'timestamp': '2025-09-10 02:45:33.118500', 'step': 9119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:33.171431', 'step': 9119, 'epoch': 2} {'type': 'loss', 'content': 0.12245749682188034, 'timestamp': '2025-09-10 02:45:33.177166', 'step': 9120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:33.229970', 'step': 9120, 'epoch': 2} {'type': 'loss', 'content': 0.07218121737241745, 'timestamp': '2025-09-10 02:45:33.232138', 'step': 9121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:33.285743', 'step': 9121, 'epoch': 2} {'type': 'loss', 'content': 0.1298273205757141, 'timestamp': '2025-09-10 02:45:33.288023', 'step': 9122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:33.340685', 'step': 9122, 'epoch': 2} {'type': 'loss', 'content': 0.09376362711191177, 'timestamp': '2025-09-10 02:45:33.343030', 'step': 9123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:33.396892', 'step': 9123, 'epoch': 2} {'type': 'loss', 'content': 0.11739595234394073, 'timestamp': '2025-09-10 02:45:33.403044', 'step': 9124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:33.456672', 'step': 9124, 'epoch': 2} {'type': 'loss', 'content': 0.10147152096033096, 'timestamp': '2025-09-10 02:45:33.458978', 'step': 9125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:33.512706', 'step': 9125, 'epoch': 2} {'type': 'loss', 'content': 0.1021571233868599, 'timestamp': '2025-09-10 02:45:33.514855', 'step': 9126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:33.568266', 'step': 9126, 'epoch': 2} {'type': 'loss', 'content': 0.1450095921754837, 'timestamp': '2025-09-10 02:45:33.570470', 'step': 9127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:33.625184', 'step': 9127, 'epoch': 2} {'type': 'loss', 'content': 0.15189243853092194, 'timestamp': '2025-09-10 02:45:33.631232', 'step': 9128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:33.686032', 'step': 9128, 'epoch': 2} {'type': 'loss', 'content': 0.0740402564406395, 'timestamp': '2025-09-10 02:45:33.688060', 'step': 9129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:33.748991', 'step': 9129, 'epoch': 2} {'type': 'loss', 'content': 0.12061096727848053, 'timestamp': '2025-09-10 02:45:33.754642', 'step': 9130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:33.808217', 'step': 9130, 'epoch': 2} {'type': 'loss', 'content': 0.1397971659898758, 'timestamp': '2025-09-10 02:45:33.810487', 'step': 9131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:33.864559', 'step': 9131, 'epoch': 2} {'type': 'loss', 'content': 0.1653670221567154, 'timestamp': '2025-09-10 02:45:33.870802', 'step': 9132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:33.923792', 'step': 9132, 'epoch': 2} {'type': 'loss', 'content': 0.10166119039058685, 'timestamp': '2025-09-10 02:45:33.926805', 'step': 9133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:33.982112', 'step': 9133, 'epoch': 2} {'type': 'loss', 'content': 0.11303827911615372, 'timestamp': '2025-09-10 02:45:33.984410', 'step': 9134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:34.038702', 'step': 9134, 'epoch': 2} {'type': 'loss', 'content': 0.20075266063213348, 'timestamp': '2025-09-10 02:45:34.041367', 'step': 9135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:34.095294', 'step': 9135, 'epoch': 2} {'type': 'loss', 'content': 0.12551087141036987, 'timestamp': '2025-09-10 02:45:34.101126', 'step': 9136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:34.153350', 'step': 9136, 'epoch': 2} {'type': 'loss', 'content': 0.10089904814958572, 'timestamp': '2025-09-10 02:45:34.156603', 'step': 9137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:34.209988', 'step': 9137, 'epoch': 2} {'type': 'loss', 'content': 0.09197698533535004, 'timestamp': '2025-09-10 02:45:34.213502', 'step': 9138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:34.267542', 'step': 9138, 'epoch': 2} {'type': 'loss', 'content': 0.10591184347867966, 'timestamp': '2025-09-10 02:45:34.269905', 'step': 9139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:34.323356', 'step': 9139, 'epoch': 2} {'type': 'loss', 'content': 0.16959825158119202, 'timestamp': '2025-09-10 02:45:34.329257', 'step': 9140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:34.382494', 'step': 9140, 'epoch': 2} {'type': 'loss', 'content': 0.09993152320384979, 'timestamp': '2025-09-10 02:45:34.384625', 'step': 9141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:34.437314', 'step': 9141, 'epoch': 2} {'type': 'loss', 'content': 0.06706330925226212, 'timestamp': '2025-09-10 02:45:34.439630', 'step': 9142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:34.492425', 'step': 9142, 'epoch': 2} {'type': 'loss', 'content': 0.07765465974807739, 'timestamp': '2025-09-10 02:45:34.496103', 'step': 9143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:34.550610', 'step': 9143, 'epoch': 2} {'type': 'loss', 'content': 0.1385851949453354, 'timestamp': '2025-09-10 02:45:34.557188', 'step': 9144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:34.613240', 'step': 9144, 'epoch': 2} {'type': 'loss', 'content': 0.22969330847263336, 'timestamp': '2025-09-10 02:45:34.615352', 'step': 9145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:34.668911', 'step': 9145, 'epoch': 2} {'type': 'loss', 'content': 0.0910065546631813, 'timestamp': '2025-09-10 02:45:34.672142', 'step': 9146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:34.728763', 'step': 9146, 'epoch': 2} {'type': 'loss', 'content': 0.226629838347435, 'timestamp': '2025-09-10 02:45:34.731161', 'step': 9147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:34.785240', 'step': 9147, 'epoch': 2} {'type': 'loss', 'content': 0.056649401783943176, 'timestamp': '2025-09-10 02:45:34.791797', 'step': 9148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:34.846179', 'step': 9148, 'epoch': 2} {'type': 'loss', 'content': 0.08542059361934662, 'timestamp': '2025-09-10 02:45:34.848430', 'step': 9149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:34.902613', 'step': 9149, 'epoch': 2} {'type': 'loss', 'content': 0.18398717045783997, 'timestamp': '2025-09-10 02:45:34.904685', 'step': 9150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:34.958944', 'step': 9150, 'epoch': 2} {'type': 'loss', 'content': 0.21434669196605682, 'timestamp': '2025-09-10 02:45:34.961031', 'step': 9151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:35.014694', 'step': 9151, 'epoch': 2} {'type': 'loss', 'content': 0.08958520740270615, 'timestamp': '2025-09-10 02:45:35.021943', 'step': 9152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:35.077540', 'step': 9152, 'epoch': 2} {'type': 'loss', 'content': 0.18488414585590363, 'timestamp': '2025-09-10 02:45:35.083386', 'step': 9153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:35.139805', 'step': 9153, 'epoch': 2} {'type': 'loss', 'content': 0.10703004151582718, 'timestamp': '2025-09-10 02:45:35.142063', 'step': 9154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:35.195790', 'step': 9154, 'epoch': 2} {'type': 'loss', 'content': 0.11996355652809143, 'timestamp': '2025-09-10 02:45:35.198032', 'step': 9155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:35.251913', 'step': 9155, 'epoch': 2} {'type': 'loss', 'content': 0.07623704522848129, 'timestamp': '2025-09-10 02:45:35.258086', 'step': 9156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:35.310598', 'step': 9156, 'epoch': 2} {'type': 'loss', 'content': 0.14027206599712372, 'timestamp': '2025-09-10 02:45:35.312752', 'step': 9157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:35.365106', 'step': 9157, 'epoch': 2} {'type': 'loss', 'content': 0.12649418413639069, 'timestamp': '2025-09-10 02:45:35.367487', 'step': 9158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:35.420965', 'step': 9158, 'epoch': 2} {'type': 'loss', 'content': 0.0798412561416626, 'timestamp': '2025-09-10 02:45:35.423076', 'step': 9159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:35.480505', 'step': 9159, 'epoch': 2} {'type': 'loss', 'content': 0.20035845041275024, 'timestamp': '2025-09-10 02:45:35.486461', 'step': 9160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:35.539524', 'step': 9160, 'epoch': 2} {'type': 'loss', 'content': 0.06309380382299423, 'timestamp': '2025-09-10 02:45:35.541709', 'step': 9161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:35.599290', 'step': 9161, 'epoch': 2} {'type': 'loss', 'content': 0.11172869801521301, 'timestamp': '2025-09-10 02:45:35.601727', 'step': 9162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:35.654878', 'step': 9162, 'epoch': 2} {'type': 'loss', 'content': 0.1062638908624649, 'timestamp': '2025-09-10 02:45:35.657372', 'step': 9163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:35.713004', 'step': 9163, 'epoch': 2} {'type': 'loss', 'content': 0.11675325036048889, 'timestamp': '2025-09-10 02:45:35.721256', 'step': 9164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:35.781094', 'step': 9164, 'epoch': 2} {'type': 'loss', 'content': 0.23556387424468994, 'timestamp': '2025-09-10 02:45:35.783343', 'step': 9165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:35.837737', 'step': 9165, 'epoch': 2} {'type': 'loss', 'content': 0.16406933963298798, 'timestamp': '2025-09-10 02:45:35.839933', 'step': 9166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:35.893706', 'step': 9166, 'epoch': 2} {'type': 'loss', 'content': 0.14996150135993958, 'timestamp': '2025-09-10 02:45:35.896356', 'step': 9167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:35.949630', 'step': 9167, 'epoch': 2} {'type': 'loss', 'content': 0.10798485577106476, 'timestamp': '2025-09-10 02:45:35.957676', 'step': 9168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:36.010756', 'step': 9168, 'epoch': 2} {'type': 'loss', 'content': 0.13531379401683807, 'timestamp': '2025-09-10 02:45:36.013113', 'step': 9169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:36.066152', 'step': 9169, 'epoch': 2} {'type': 'loss', 'content': 0.1497320681810379, 'timestamp': '2025-09-10 02:45:36.068258', 'step': 9170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:36.126398', 'step': 9170, 'epoch': 2} {'type': 'loss', 'content': 0.09613189101219177, 'timestamp': '2025-09-10 02:45:36.128578', 'step': 9171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:36.185106', 'step': 9171, 'epoch': 2} {'type': 'loss', 'content': 0.17562822997570038, 'timestamp': '2025-09-10 02:45:36.191582', 'step': 9172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:36.247995', 'step': 9172, 'epoch': 2} {'type': 'loss', 'content': 0.06773518770933151, 'timestamp': '2025-09-10 02:45:36.250301', 'step': 9173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:36.307891', 'step': 9173, 'epoch': 2} {'type': 'loss', 'content': 0.22508685290813446, 'timestamp': '2025-09-10 02:45:36.311520', 'step': 9174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:36.372021', 'step': 9174, 'epoch': 2} {'type': 'loss', 'content': 0.2099260687828064, 'timestamp': '2025-09-10 02:45:36.374309', 'step': 9175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:36.427103', 'step': 9175, 'epoch': 2} {'type': 'loss', 'content': 0.08139000087976456, 'timestamp': '2025-09-10 02:45:36.433115', 'step': 9176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:36.493531', 'step': 9176, 'epoch': 2} {'type': 'loss', 'content': 0.09139601141214371, 'timestamp': '2025-09-10 02:45:36.495466', 'step': 9177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:36.558785', 'step': 9177, 'epoch': 2} {'type': 'loss', 'content': 0.13431385159492493, 'timestamp': '2025-09-10 02:45:36.560885', 'step': 9178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:36.614956', 'step': 9178, 'epoch': 2} {'type': 'loss', 'content': 0.2911626994609833, 'timestamp': '2025-09-10 02:45:36.617007', 'step': 9179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:36.669775', 'step': 9179, 'epoch': 2} {'type': 'loss', 'content': 0.09253226220607758, 'timestamp': '2025-09-10 02:45:36.675388', 'step': 9180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:36.733557', 'step': 9180, 'epoch': 2} {'type': 'loss', 'content': 0.13463734090328217, 'timestamp': '2025-09-10 02:45:36.735861', 'step': 9181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:36.789248', 'step': 9181, 'epoch': 2} {'type': 'loss', 'content': 0.14807511866092682, 'timestamp': '2025-09-10 02:45:36.792566', 'step': 9182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:36.847675', 'step': 9182, 'epoch': 2} {'type': 'loss', 'content': 0.15440933406352997, 'timestamp': '2025-09-10 02:45:36.851872', 'step': 9183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:36.911542', 'step': 9183, 'epoch': 2} {'type': 'loss', 'content': 0.14355409145355225, 'timestamp': '2025-09-10 02:45:36.917609', 'step': 9184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:36.969713', 'step': 9184, 'epoch': 2} {'type': 'loss', 'content': 0.08293195813894272, 'timestamp': '2025-09-10 02:45:36.971885', 'step': 9185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:37.025771', 'step': 9185, 'epoch': 2} {'type': 'loss', 'content': 0.12044347077608109, 'timestamp': '2025-09-10 02:45:37.027964', 'step': 9186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:37.083844', 'step': 9186, 'epoch': 2} {'type': 'loss', 'content': 0.12234282493591309, 'timestamp': '2025-09-10 02:45:37.088404', 'step': 9187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:37.144263', 'step': 9187, 'epoch': 2} {'type': 'loss', 'content': 0.08886802196502686, 'timestamp': '2025-09-10 02:45:37.150458', 'step': 9188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:37.204849', 'step': 9188, 'epoch': 2} {'type': 'loss', 'content': 0.181834414601326, 'timestamp': '2025-09-10 02:45:37.206993', 'step': 9189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:37.259910', 'step': 9189, 'epoch': 2} {'type': 'loss', 'content': 0.13811282813549042, 'timestamp': '2025-09-10 02:45:37.263414', 'step': 9190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:37.324385', 'step': 9190, 'epoch': 2} {'type': 'loss', 'content': 0.04833567887544632, 'timestamp': '2025-09-10 02:45:37.326352', 'step': 9191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:37.378825', 'step': 9191, 'epoch': 2} {'type': 'loss', 'content': 0.13635903596878052, 'timestamp': '2025-09-10 02:45:37.385172', 'step': 9192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:37.437598', 'step': 9192, 'epoch': 2} {'type': 'loss', 'content': 0.1807234287261963, 'timestamp': '2025-09-10 02:45:37.441573', 'step': 9193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:37.496603', 'step': 9193, 'epoch': 2} {'type': 'loss', 'content': 0.15183338522911072, 'timestamp': '2025-09-10 02:45:37.498560', 'step': 9194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:37.551863', 'step': 9194, 'epoch': 2} {'type': 'loss', 'content': 0.1339077353477478, 'timestamp': '2025-09-10 02:45:37.555298', 'step': 9195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:37.609183', 'step': 9195, 'epoch': 2} {'type': 'loss', 'content': 0.1533094346523285, 'timestamp': '2025-09-10 02:45:37.615362', 'step': 9196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:37.667686', 'step': 9196, 'epoch': 2} {'type': 'loss', 'content': 0.09346067905426025, 'timestamp': '2025-09-10 02:45:37.669769', 'step': 9197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:37.735617', 'step': 9197, 'epoch': 2} {'type': 'loss', 'content': 0.15873010456562042, 'timestamp': '2025-09-10 02:45:37.737825', 'step': 9198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:37.790916', 'step': 9198, 'epoch': 2} {'type': 'loss', 'content': 0.1473650485277176, 'timestamp': '2025-09-10 02:45:37.793089', 'step': 9199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:37.851005', 'step': 9199, 'epoch': 2} {'type': 'loss', 'content': 0.1802941858768463, 'timestamp': '2025-09-10 02:45:37.858650', 'step': 9200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:37.911416', 'step': 9200, 'epoch': 2} {'type': 'loss', 'content': 0.13378357887268066, 'timestamp': '2025-09-10 02:45:37.913536', 'step': 9201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:37.965749', 'step': 9201, 'epoch': 2} {'type': 'loss', 'content': 0.11765933781862259, 'timestamp': '2025-09-10 02:45:37.968076', 'step': 9202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:38.021623', 'step': 9202, 'epoch': 2} {'type': 'loss', 'content': 0.10370698571205139, 'timestamp': '2025-09-10 02:45:38.024018', 'step': 9203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:38.077167', 'step': 9203, 'epoch': 2} {'type': 'loss', 'content': 0.09071947634220123, 'timestamp': '2025-09-10 02:45:38.085466', 'step': 9204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:38.143498', 'step': 9204, 'epoch': 2} {'type': 'loss', 'content': 0.16592447459697723, 'timestamp': '2025-09-10 02:45:38.152681', 'step': 9205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:38.213614', 'step': 9205, 'epoch': 2} {'type': 'loss', 'content': 0.12798811495304108, 'timestamp': '2025-09-10 02:45:38.215581', 'step': 9206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:38.268512', 'step': 9206, 'epoch': 2} {'type': 'loss', 'content': 0.08484305441379547, 'timestamp': '2025-09-10 02:45:38.271589', 'step': 9207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:38.330343', 'step': 9207, 'epoch': 2} {'type': 'loss', 'content': 0.1194133460521698, 'timestamp': '2025-09-10 02:45:38.336307', 'step': 9208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:38.393671', 'step': 9208, 'epoch': 2} {'type': 'loss', 'content': 0.10894646495580673, 'timestamp': '2025-09-10 02:45:38.395762', 'step': 9209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:38.448676', 'step': 9209, 'epoch': 2} {'type': 'loss', 'content': 0.11545659601688385, 'timestamp': '2025-09-10 02:45:38.450901', 'step': 9210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:38.504330', 'step': 9210, 'epoch': 2} {'type': 'loss', 'content': 0.1358119696378708, 'timestamp': '2025-09-10 02:45:38.506948', 'step': 9211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:38.561085', 'step': 9211, 'epoch': 2} {'type': 'loss', 'content': 0.18293391168117523, 'timestamp': '2025-09-10 02:45:38.567243', 'step': 9212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:38.624509', 'step': 9212, 'epoch': 2} {'type': 'loss', 'content': 0.15164996683597565, 'timestamp': '2025-09-10 02:45:38.627616', 'step': 9213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:38.679950', 'step': 9213, 'epoch': 2} {'type': 'loss', 'content': 0.16879090666770935, 'timestamp': '2025-09-10 02:45:38.684370', 'step': 9214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:38.737255', 'step': 9214, 'epoch': 2} {'type': 'loss', 'content': 0.06540393084287643, 'timestamp': '2025-09-10 02:45:38.739427', 'step': 9215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:38.792423', 'step': 9215, 'epoch': 2} {'type': 'loss', 'content': 0.08493708074092865, 'timestamp': '2025-09-10 02:45:38.798123', 'step': 9216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:38.850481', 'step': 9216, 'epoch': 2} {'type': 'loss', 'content': 0.10077232122421265, 'timestamp': '2025-09-10 02:45:38.853623', 'step': 9217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:38.907685', 'step': 9217, 'epoch': 2} {'type': 'loss', 'content': 0.12387101352214813, 'timestamp': '2025-09-10 02:45:38.910022', 'step': 9218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:38.963922', 'step': 9218, 'epoch': 2} {'type': 'loss', 'content': 0.10670596361160278, 'timestamp': '2025-09-10 02:45:38.968559', 'step': 9219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:39.023489', 'step': 9219, 'epoch': 2} {'type': 'loss', 'content': 0.11223506927490234, 'timestamp': '2025-09-10 02:45:39.029437', 'step': 9220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:39.089940', 'step': 9220, 'epoch': 2} {'type': 'loss', 'content': 0.09441123902797699, 'timestamp': '2025-09-10 02:45:39.092110', 'step': 9221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:39.147032', 'step': 9221, 'epoch': 2} {'type': 'loss', 'content': 0.12793588638305664, 'timestamp': '2025-09-10 02:45:39.149833', 'step': 9222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:39.203907', 'step': 9222, 'epoch': 2} {'type': 'loss', 'content': 0.11695315688848495, 'timestamp': '2025-09-10 02:45:39.206112', 'step': 9223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:39.260533', 'step': 9223, 'epoch': 2} {'type': 'loss', 'content': 0.17942121624946594, 'timestamp': '2025-09-10 02:45:39.266297', 'step': 9224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:39.320779', 'step': 9224, 'epoch': 2} {'type': 'loss', 'content': 0.09394613653421402, 'timestamp': '2025-09-10 02:45:39.323043', 'step': 9225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:39.376455', 'step': 9225, 'epoch': 2} {'type': 'loss', 'content': 0.09058400243520737, 'timestamp': '2025-09-10 02:45:39.378831', 'step': 9226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:39.432898', 'step': 9226, 'epoch': 2} {'type': 'loss', 'content': 0.1021987646818161, 'timestamp': '2025-09-10 02:45:39.435251', 'step': 9227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:39.489886', 'step': 9227, 'epoch': 2} {'type': 'loss', 'content': 0.14905193448066711, 'timestamp': '2025-09-10 02:45:39.495784', 'step': 9228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:39.551824', 'step': 9228, 'epoch': 2} {'type': 'loss', 'content': 0.12992891669273376, 'timestamp': '2025-09-10 02:45:39.556501', 'step': 9229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:39.623775', 'step': 9229, 'epoch': 2} {'type': 'loss', 'content': 0.14321155846118927, 'timestamp': '2025-09-10 02:45:39.625944', 'step': 9230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:39.679310', 'step': 9230, 'epoch': 2} {'type': 'loss', 'content': 0.11424258351325989, 'timestamp': '2025-09-10 02:45:39.685318', 'step': 9231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:39.738717', 'step': 9231, 'epoch': 2} {'type': 'loss', 'content': 0.0840146392583847, 'timestamp': '2025-09-10 02:45:39.749375', 'step': 9232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:39.815498', 'step': 9232, 'epoch': 2} {'type': 'loss', 'content': 0.183223158121109, 'timestamp': '2025-09-10 02:45:39.819738', 'step': 9233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:39.876503', 'step': 9233, 'epoch': 2} {'type': 'loss', 'content': 0.1732870489358902, 'timestamp': '2025-09-10 02:45:39.880353', 'step': 9234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:39.938519', 'step': 9234, 'epoch': 2} {'type': 'loss', 'content': 0.17455604672431946, 'timestamp': '2025-09-10 02:45:39.940570', 'step': 9235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:39.994222', 'step': 9235, 'epoch': 2} {'type': 'loss', 'content': 0.12623654305934906, 'timestamp': '2025-09-10 02:45:40.003426', 'step': 9236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:40.060099', 'step': 9236, 'epoch': 2} {'type': 'loss', 'content': 0.16610777378082275, 'timestamp': '2025-09-10 02:45:40.062460', 'step': 9237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:40.116864', 'step': 9237, 'epoch': 2} {'type': 'loss', 'content': 0.16748808324337006, 'timestamp': '2025-09-10 02:45:40.120635', 'step': 9238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:40.174430', 'step': 9238, 'epoch': 2} {'type': 'loss', 'content': 0.1448969841003418, 'timestamp': '2025-09-10 02:45:40.176777', 'step': 9239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:40.232745', 'step': 9239, 'epoch': 2} {'type': 'loss', 'content': 0.16109170019626617, 'timestamp': '2025-09-10 02:45:40.238653', 'step': 9240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:40.291882', 'step': 9240, 'epoch': 2} {'type': 'loss', 'content': 0.20474621653556824, 'timestamp': '2025-09-10 02:45:40.294130', 'step': 9241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:40.360540', 'step': 9241, 'epoch': 2} {'type': 'loss', 'content': 0.13799986243247986, 'timestamp': '2025-09-10 02:45:40.364198', 'step': 9242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:40.418339', 'step': 9242, 'epoch': 2} {'type': 'loss', 'content': 0.11291700601577759, 'timestamp': '2025-09-10 02:45:40.421749', 'step': 9243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:40.475846', 'step': 9243, 'epoch': 2} {'type': 'loss', 'content': 0.15413905680179596, 'timestamp': '2025-09-10 02:45:40.481663', 'step': 9244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:40.536498', 'step': 9244, 'epoch': 2} {'type': 'loss', 'content': 0.1307460516691208, 'timestamp': '2025-09-10 02:45:40.541079', 'step': 9245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:40.610572', 'step': 9245, 'epoch': 2} {'type': 'loss', 'content': 0.057630088180303574, 'timestamp': '2025-09-10 02:45:40.612923', 'step': 9246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:40.672816', 'step': 9246, 'epoch': 2} {'type': 'loss', 'content': 0.1042821854352951, 'timestamp': '2025-09-10 02:45:40.674974', 'step': 9247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:40.728341', 'step': 9247, 'epoch': 2} {'type': 'loss', 'content': 0.09081985056400299, 'timestamp': '2025-09-10 02:45:40.738706', 'step': 9248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:40.795336', 'step': 9248, 'epoch': 2} {'type': 'loss', 'content': 0.12945319712162018, 'timestamp': '2025-09-10 02:45:40.798531', 'step': 9249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:40.854620', 'step': 9249, 'epoch': 2} {'type': 'loss', 'content': 0.18624921143054962, 'timestamp': '2025-09-10 02:45:40.857353', 'step': 9250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:40.911834', 'step': 9250, 'epoch': 2} {'type': 'loss', 'content': 0.12464644759893417, 'timestamp': '2025-09-10 02:45:40.914045', 'step': 9251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:40.972420', 'step': 9251, 'epoch': 2} {'type': 'loss', 'content': 0.14022932946681976, 'timestamp': '2025-09-10 02:45:40.978224', 'step': 9252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:41.031329', 'step': 9252, 'epoch': 2} {'type': 'loss', 'content': 0.10785144567489624, 'timestamp': '2025-09-10 02:45:41.033614', 'step': 9253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:41.095727', 'step': 9253, 'epoch': 2} {'type': 'loss', 'content': 0.18468335270881653, 'timestamp': '2025-09-10 02:45:41.100607', 'step': 9254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:41.159326', 'step': 9254, 'epoch': 2} {'type': 'loss', 'content': 0.06498969346284866, 'timestamp': '2025-09-10 02:45:41.162655', 'step': 9255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:41.220314', 'step': 9255, 'epoch': 2} {'type': 'loss', 'content': 0.17262500524520874, 'timestamp': '2025-09-10 02:45:41.226418', 'step': 9256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:41.283820', 'step': 9256, 'epoch': 2} {'type': 'loss', 'content': 0.2204376608133316, 'timestamp': '2025-09-10 02:45:41.286489', 'step': 9257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:41.347406', 'step': 9257, 'epoch': 2} {'type': 'loss', 'content': 0.22824905812740326, 'timestamp': '2025-09-10 02:45:41.349734', 'step': 9258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:41.403117', 'step': 9258, 'epoch': 2} {'type': 'loss', 'content': 0.15474076569080353, 'timestamp': '2025-09-10 02:45:41.405362', 'step': 9259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:41.458869', 'step': 9259, 'epoch': 2} {'type': 'loss', 'content': 0.10865510255098343, 'timestamp': '2025-09-10 02:45:41.465963', 'step': 9260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:41.521051', 'step': 9260, 'epoch': 2} {'type': 'loss', 'content': 0.1468498408794403, 'timestamp': '2025-09-10 02:45:41.527968', 'step': 9261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:41.596904', 'step': 9261, 'epoch': 2} {'type': 'loss', 'content': 0.06320523470640182, 'timestamp': '2025-09-10 02:45:41.599093', 'step': 9262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:41.659218', 'step': 9262, 'epoch': 2} {'type': 'loss', 'content': 0.1100827306509018, 'timestamp': '2025-09-10 02:45:41.662755', 'step': 9263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:41.716916', 'step': 9263, 'epoch': 2} {'type': 'loss', 'content': 0.1585170179605484, 'timestamp': '2025-09-10 02:45:41.723598', 'step': 9264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:41.778151', 'step': 9264, 'epoch': 2} {'type': 'loss', 'content': 0.16876277327537537, 'timestamp': '2025-09-10 02:45:41.780363', 'step': 9265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:41.833842', 'step': 9265, 'epoch': 2} {'type': 'loss', 'content': 0.18549282848834991, 'timestamp': '2025-09-10 02:45:41.836113', 'step': 9266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:41.890632', 'step': 9266, 'epoch': 2} {'type': 'loss', 'content': 0.12135031074285507, 'timestamp': '2025-09-10 02:45:41.892830', 'step': 9267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:41.946801', 'step': 9267, 'epoch': 2} {'type': 'loss', 'content': 0.14125710725784302, 'timestamp': '2025-09-10 02:45:41.952939', 'step': 9268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:42.013300', 'step': 9268, 'epoch': 2} {'type': 'loss', 'content': 0.12288275361061096, 'timestamp': '2025-09-10 02:45:42.015587', 'step': 9269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:42.069060', 'step': 9269, 'epoch': 2} {'type': 'loss', 'content': 0.1430894434452057, 'timestamp': '2025-09-10 02:45:42.071288', 'step': 9270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:42.129643', 'step': 9270, 'epoch': 2} {'type': 'loss', 'content': 0.08524300158023834, 'timestamp': '2025-09-10 02:45:42.131926', 'step': 9271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:42.194267', 'step': 9271, 'epoch': 2} {'type': 'loss', 'content': 0.1172148808836937, 'timestamp': '2025-09-10 02:45:42.204232', 'step': 9272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:42.262090', 'step': 9272, 'epoch': 2} {'type': 'loss', 'content': 0.12352132797241211, 'timestamp': '2025-09-10 02:45:42.264480', 'step': 9273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:42.320763', 'step': 9273, 'epoch': 2} {'type': 'loss', 'content': 0.08953379839658737, 'timestamp': '2025-09-10 02:45:42.323008', 'step': 9274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:42.379371', 'step': 9274, 'epoch': 2} {'type': 'loss', 'content': 0.08591382205486298, 'timestamp': '2025-09-10 02:45:42.381574', 'step': 9275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:42.441801', 'step': 9275, 'epoch': 2} {'type': 'loss', 'content': 0.15377408266067505, 'timestamp': '2025-09-10 02:45:42.451328', 'step': 9276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:42.509245', 'step': 9276, 'epoch': 2} {'type': 'loss', 'content': 0.11947233974933624, 'timestamp': '2025-09-10 02:45:42.511848', 'step': 9277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:42.567977', 'step': 9277, 'epoch': 2} {'type': 'loss', 'content': 0.061487942934036255, 'timestamp': '2025-09-10 02:45:42.570825', 'step': 9278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:42.627446', 'step': 9278, 'epoch': 2} {'type': 'loss', 'content': 0.14970698952674866, 'timestamp': '2025-09-10 02:45:42.629995', 'step': 9279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:42.687625', 'step': 9279, 'epoch': 2} {'type': 'loss', 'content': 0.17688405513763428, 'timestamp': '2025-09-10 02:45:42.696374', 'step': 9280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:42.753126', 'step': 9280, 'epoch': 2} {'type': 'loss', 'content': 0.08920987695455551, 'timestamp': '2025-09-10 02:45:42.756013', 'step': 9281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:42.812567', 'step': 9281, 'epoch': 2} {'type': 'loss', 'content': 0.14819148182868958, 'timestamp': '2025-09-10 02:45:42.816823', 'step': 9282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:42.871354', 'step': 9282, 'epoch': 2} {'type': 'loss', 'content': 0.12357502430677414, 'timestamp': '2025-09-10 02:45:42.873680', 'step': 9283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:42.931520', 'step': 9283, 'epoch': 2} {'type': 'loss', 'content': 0.1150437593460083, 'timestamp': '2025-09-10 02:45:42.938937', 'step': 9284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:43.000127', 'step': 9284, 'epoch': 2} {'type': 'loss', 'content': 0.21312536299228668, 'timestamp': '2025-09-10 02:45:43.002314', 'step': 9285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:43.058662', 'step': 9285, 'epoch': 2} {'type': 'loss', 'content': 0.12204859405755997, 'timestamp': '2025-09-10 02:45:43.060706', 'step': 9286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:43.114746', 'step': 9286, 'epoch': 2} {'type': 'loss', 'content': 0.17118869721889496, 'timestamp': '2025-09-10 02:45:43.118795', 'step': 9287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:43.174094', 'step': 9287, 'epoch': 2} {'type': 'loss', 'content': 0.07135585695505142, 'timestamp': '2025-09-10 02:45:43.180522', 'step': 9288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:43.236784', 'step': 9288, 'epoch': 2} {'type': 'loss', 'content': 0.131384015083313, 'timestamp': '2025-09-10 02:45:43.239200', 'step': 9289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:43.311187', 'step': 9289, 'epoch': 2} {'type': 'loss', 'content': 0.05756557360291481, 'timestamp': '2025-09-10 02:45:43.313482', 'step': 9290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:43.373505', 'step': 9290, 'epoch': 2} {'type': 'loss', 'content': 0.141642227768898, 'timestamp': '2025-09-10 02:45:43.375622', 'step': 9291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:43.432454', 'step': 9291, 'epoch': 2} {'type': 'loss', 'content': 0.14485354721546173, 'timestamp': '2025-09-10 02:45:43.438780', 'step': 9292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:43.495868', 'step': 9292, 'epoch': 2} {'type': 'loss', 'content': 0.16372531652450562, 'timestamp': '2025-09-10 02:45:43.498016', 'step': 9293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:43.553030', 'step': 9293, 'epoch': 2} {'type': 'loss', 'content': 0.17647351324558258, 'timestamp': '2025-09-10 02:45:43.558564', 'step': 9294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:43.616555', 'step': 9294, 'epoch': 2} {'type': 'loss', 'content': 0.059703994542360306, 'timestamp': '2025-09-10 02:45:43.618739', 'step': 9295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:43.672065', 'step': 9295, 'epoch': 2} {'type': 'loss', 'content': 0.16374564170837402, 'timestamp': '2025-09-10 02:45:43.678961', 'step': 9296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:43.736547', 'step': 9296, 'epoch': 2} {'type': 'loss', 'content': 0.07094306498765945, 'timestamp': '2025-09-10 02:45:43.738929', 'step': 9297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:45:43.793664', 'step': 9297, 'epoch': 2} {'type': 'loss', 'content': 0.18635722994804382, 'timestamp': '2025-09-10 02:45:43.800077', 'step': 9298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:43.856917', 'step': 9298, 'epoch': 2} {'type': 'loss', 'content': 0.18790121376514435, 'timestamp': '2025-09-10 02:45:43.859095', 'step': 9299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:43.913323', 'step': 9299, 'epoch': 2} {'type': 'loss', 'content': 0.20455515384674072, 'timestamp': '2025-09-10 02:45:43.926257', 'step': 9300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:43.980622', 'step': 9300, 'epoch': 2} {'type': 'loss', 'content': 0.07277932018041611, 'timestamp': '2025-09-10 02:45:43.990324', 'step': 9301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:44.046289', 'step': 9301, 'epoch': 2} {'type': 'loss', 'content': 0.1287355273962021, 'timestamp': '2025-09-10 02:45:44.055305', 'step': 9302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:44.115419', 'step': 9302, 'epoch': 2} {'type': 'loss', 'content': 0.12474199384450912, 'timestamp': '2025-09-10 02:45:44.117693', 'step': 9303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:44.173722', 'step': 9303, 'epoch': 2} {'type': 'loss', 'content': 0.15843671560287476, 'timestamp': '2025-09-10 02:45:44.179798', 'step': 9304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:44.238255', 'step': 9304, 'epoch': 2} {'type': 'loss', 'content': 0.1119849905371666, 'timestamp': '2025-09-10 02:45:44.240674', 'step': 9305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:44.300405', 'step': 9305, 'epoch': 2} {'type': 'loss', 'content': 0.1557469666004181, 'timestamp': '2025-09-10 02:45:44.302409', 'step': 9306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:44.356178', 'step': 9306, 'epoch': 2} {'type': 'loss', 'content': 0.12459491193294525, 'timestamp': '2025-09-10 02:45:44.364506', 'step': 9307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:44.432114', 'step': 9307, 'epoch': 2} {'type': 'loss', 'content': 0.14837490022182465, 'timestamp': '2025-09-10 02:45:44.438551', 'step': 9308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:44.504213', 'step': 9308, 'epoch': 2} {'type': 'loss', 'content': 0.14960850775241852, 'timestamp': '2025-09-10 02:45:44.508700', 'step': 9309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:44.571019', 'step': 9309, 'epoch': 2} {'type': 'loss', 'content': 0.1066817194223404, 'timestamp': '2025-09-10 02:45:44.574585', 'step': 9310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:44.641964', 'step': 9310, 'epoch': 2} {'type': 'loss', 'content': 0.10318863391876221, 'timestamp': '2025-09-10 02:45:44.646420', 'step': 9311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:44.703841', 'step': 9311, 'epoch': 2} {'type': 'loss', 'content': 0.16491517424583435, 'timestamp': '2025-09-10 02:45:44.710244', 'step': 9312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:44.764130', 'step': 9312, 'epoch': 2} {'type': 'loss', 'content': 0.170639306306839, 'timestamp': '2025-09-10 02:45:44.766302', 'step': 9313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:44.826017', 'step': 9313, 'epoch': 2} {'type': 'loss', 'content': 0.10779724270105362, 'timestamp': '2025-09-10 02:45:44.831998', 'step': 9314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:44.887813', 'step': 9314, 'epoch': 2} {'type': 'loss', 'content': 0.11710292100906372, 'timestamp': '2025-09-10 02:45:44.889918', 'step': 9315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:44.948929', 'step': 9315, 'epoch': 2} {'type': 'loss', 'content': 0.10653866082429886, 'timestamp': '2025-09-10 02:45:44.955002', 'step': 9316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:45.011713', 'step': 9316, 'epoch': 2} {'type': 'loss', 'content': 0.09483093023300171, 'timestamp': '2025-09-10 02:45:45.019433', 'step': 9317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:45:45.078685', 'step': 9317, 'epoch': 2} {'type': 'loss', 'content': 0.059204161167144775, 'timestamp': '2025-09-10 02:45:45.080850', 'step': 9318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:45.135325', 'step': 9318, 'epoch': 2} {'type': 'loss', 'content': 0.07387764006853104, 'timestamp': '2025-09-10 02:45:45.137346', 'step': 9319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:45.190380', 'step': 9319, 'epoch': 2} {'type': 'loss', 'content': 0.10800739377737045, 'timestamp': '2025-09-10 02:45:45.196444', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:45:59.748027', 'step': 9320, 'epoch': 2} {'type': 'pplx', 'content': 12810.814651882956, 'timestamp': '2025-09-10 02:45:59.750999', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:45:59.805035', 'step': 9320, 'epoch': 2} {'type': 'loss', 'content': 0.10837974399328232, 'timestamp': '2025-09-10 02:45:59.807234', 'step': 9321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:59.862405', 'step': 9321, 'epoch': 2} {'type': 'loss', 'content': 0.17091314494609833, 'timestamp': '2025-09-10 02:45:59.864373', 'step': 9322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:59.918450', 'step': 9322, 'epoch': 2} {'type': 'loss', 'content': 0.13631980121135712, 'timestamp': '2025-09-10 02:45:59.920455', 'step': 9323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:45:59.975234', 'step': 9323, 'epoch': 2} {'type': 'loss', 'content': 0.22443446516990662, 'timestamp': '2025-09-10 02:45:59.981500', 'step': 9324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.035736', 'step': 9324, 'epoch': 2} {'type': 'loss', 'content': 0.13799959421157837, 'timestamp': '2025-09-10 02:46:00.037961', 'step': 9325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:00.092721', 'step': 9325, 'epoch': 2} {'type': 'loss', 'content': 0.1606011539697647, 'timestamp': '2025-09-10 02:46:00.094542', 'step': 9326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.147962', 'step': 9326, 'epoch': 2} {'type': 'loss', 'content': 0.07775510847568512, 'timestamp': '2025-09-10 02:46:00.149669', 'step': 9327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:00.203182', 'step': 9327, 'epoch': 2} {'type': 'loss', 'content': 0.19664767384529114, 'timestamp': '2025-09-10 02:46:00.208826', 'step': 9328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.261339', 'step': 9328, 'epoch': 2} {'type': 'loss', 'content': 0.08522764593362808, 'timestamp': '2025-09-10 02:46:00.263139', 'step': 9329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.316280', 'step': 9329, 'epoch': 2} {'type': 'loss', 'content': 0.13130173087120056, 'timestamp': '2025-09-10 02:46:00.318230', 'step': 9330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.371207', 'step': 9330, 'epoch': 2} {'type': 'loss', 'content': 0.1853129118680954, 'timestamp': '2025-09-10 02:46:00.373816', 'step': 9331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.427101', 'step': 9331, 'epoch': 2} {'type': 'loss', 'content': 0.15031170845031738, 'timestamp': '2025-09-10 02:46:00.433176', 'step': 9332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:00.486393', 'step': 9332, 'epoch': 2} {'type': 'loss', 'content': 0.1534424126148224, 'timestamp': '2025-09-10 02:46:00.488572', 'step': 9333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:00.541638', 'step': 9333, 'epoch': 2} {'type': 'loss', 'content': 0.09243486821651459, 'timestamp': '2025-09-10 02:46:00.543997', 'step': 9334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:00.598831', 'step': 9334, 'epoch': 2} {'type': 'loss', 'content': 0.12669934332370758, 'timestamp': '2025-09-10 02:46:00.602056', 'step': 9335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.657690', 'step': 9335, 'epoch': 2} {'type': 'loss', 'content': 0.10343828052282333, 'timestamp': '2025-09-10 02:46:00.663448', 'step': 9336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:00.716388', 'step': 9336, 'epoch': 2} {'type': 'loss', 'content': 0.19210731983184814, 'timestamp': '2025-09-10 02:46:00.718106', 'step': 9337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.771263', 'step': 9337, 'epoch': 2} {'type': 'loss', 'content': 0.10450781136751175, 'timestamp': '2025-09-10 02:46:00.773042', 'step': 9338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.825785', 'step': 9338, 'epoch': 2} {'type': 'loss', 'content': 0.10091259330511093, 'timestamp': '2025-09-10 02:46:00.827899', 'step': 9339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:00.882115', 'step': 9339, 'epoch': 2} {'type': 'loss', 'content': 0.09122522920370102, 'timestamp': '2025-09-10 02:46:00.888067', 'step': 9340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:00.941319', 'step': 9340, 'epoch': 2} {'type': 'loss', 'content': 0.1941051036119461, 'timestamp': '2025-09-10 02:46:00.943395', 'step': 9341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:00.996355', 'step': 9341, 'epoch': 2} {'type': 'loss', 'content': 0.06394845247268677, 'timestamp': '2025-09-10 02:46:00.998501', 'step': 9342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:01.052908', 'step': 9342, 'epoch': 2} {'type': 'loss', 'content': 0.143142968416214, 'timestamp': '2025-09-10 02:46:01.054847', 'step': 9343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:01.108870', 'step': 9343, 'epoch': 2} {'type': 'loss', 'content': 0.11791951209306717, 'timestamp': '2025-09-10 02:46:01.114627', 'step': 9344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:01.167675', 'step': 9344, 'epoch': 2} {'type': 'loss', 'content': 0.09919055551290512, 'timestamp': '2025-09-10 02:46:01.169350', 'step': 9345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:01.222651', 'step': 9345, 'epoch': 2} {'type': 'loss', 'content': 0.1083284541964531, 'timestamp': '2025-09-10 02:46:01.224783', 'step': 9346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:01.278642', 'step': 9346, 'epoch': 2} {'type': 'loss', 'content': 0.10724049806594849, 'timestamp': '2025-09-10 02:46:01.280713', 'step': 9347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:01.333439', 'step': 9347, 'epoch': 2} {'type': 'loss', 'content': 0.14522381126880646, 'timestamp': '2025-09-10 02:46:01.339440', 'step': 9348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:01.392731', 'step': 9348, 'epoch': 2} {'type': 'loss', 'content': 0.0974576324224472, 'timestamp': '2025-09-10 02:46:01.395032', 'step': 9349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:01.448332', 'step': 9349, 'epoch': 2} {'type': 'loss', 'content': 0.1911814957857132, 'timestamp': '2025-09-10 02:46:01.450466', 'step': 9350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:01.504444', 'step': 9350, 'epoch': 2} {'type': 'loss', 'content': 0.12156420946121216, 'timestamp': '2025-09-10 02:46:01.506351', 'step': 9351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:01.560779', 'step': 9351, 'epoch': 2} {'type': 'loss', 'content': 0.11637900024652481, 'timestamp': '2025-09-10 02:46:01.566308', 'step': 9352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:01.618974', 'step': 9352, 'epoch': 2} {'type': 'loss', 'content': 0.08469363301992416, 'timestamp': '2025-09-10 02:46:01.620722', 'step': 9353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:01.674135', 'step': 9353, 'epoch': 2} {'type': 'loss', 'content': 0.12200885266065598, 'timestamp': '2025-09-10 02:46:01.676349', 'step': 9354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:01.730738', 'step': 9354, 'epoch': 2} {'type': 'loss', 'content': 0.12444794923067093, 'timestamp': '2025-09-10 02:46:01.732857', 'step': 9355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:01.786222', 'step': 9355, 'epoch': 2} {'type': 'loss', 'content': 0.16802407801151276, 'timestamp': '2025-09-10 02:46:01.792468', 'step': 9356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:01.845241', 'step': 9356, 'epoch': 2} {'type': 'loss', 'content': 0.10617988556623459, 'timestamp': '2025-09-10 02:46:01.847418', 'step': 9357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:01.900259', 'step': 9357, 'epoch': 2} {'type': 'loss', 'content': 0.07908699661493301, 'timestamp': '2025-09-10 02:46:01.902433', 'step': 9358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:01.956020', 'step': 9358, 'epoch': 2} {'type': 'loss', 'content': 0.09583119302988052, 'timestamp': '2025-09-10 02:46:01.958097', 'step': 9359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:02.011751', 'step': 9359, 'epoch': 2} {'type': 'loss', 'content': 0.14202573895454407, 'timestamp': '2025-09-10 02:46:02.017292', 'step': 9360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:02.070094', 'step': 9360, 'epoch': 2} {'type': 'loss', 'content': 0.1578577160835266, 'timestamp': '2025-09-10 02:46:02.071885', 'step': 9361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:02.125383', 'step': 9361, 'epoch': 2} {'type': 'loss', 'content': 0.19066239893436432, 'timestamp': '2025-09-10 02:46:02.127076', 'step': 9362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:02.180703', 'step': 9362, 'epoch': 2} {'type': 'loss', 'content': 0.24238567054271698, 'timestamp': '2025-09-10 02:46:02.183027', 'step': 9363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:02.238719', 'step': 9363, 'epoch': 2} {'type': 'loss', 'content': 0.21772406995296478, 'timestamp': '2025-09-10 02:46:02.245005', 'step': 9364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:02.298505', 'step': 9364, 'epoch': 2} {'type': 'loss', 'content': 0.13547852635383606, 'timestamp': '2025-09-10 02:46:02.300711', 'step': 9365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:02.354397', 'step': 9365, 'epoch': 2} {'type': 'loss', 'content': 0.1895153969526291, 'timestamp': '2025-09-10 02:46:02.356693', 'step': 9366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:02.410622', 'step': 9366, 'epoch': 2} {'type': 'loss', 'content': 0.15525935590267181, 'timestamp': '2025-09-10 02:46:02.412863', 'step': 9367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:02.466332', 'step': 9367, 'epoch': 2} {'type': 'loss', 'content': 0.15144801139831543, 'timestamp': '2025-09-10 02:46:02.473006', 'step': 9368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:02.526504', 'step': 9368, 'epoch': 2} {'type': 'loss', 'content': 0.17394186556339264, 'timestamp': '2025-09-10 02:46:02.532349', 'step': 9369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:02.591210', 'step': 9369, 'epoch': 2} {'type': 'loss', 'content': 0.15287114679813385, 'timestamp': '2025-09-10 02:46:02.593032', 'step': 9370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:02.646616', 'step': 9370, 'epoch': 2} {'type': 'loss', 'content': 0.12783336639404297, 'timestamp': '2025-09-10 02:46:02.648349', 'step': 9371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:02.702460', 'step': 9371, 'epoch': 2} {'type': 'loss', 'content': 0.16338476538658142, 'timestamp': '2025-09-10 02:46:02.708225', 'step': 9372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:02.762187', 'step': 9372, 'epoch': 2} {'type': 'loss', 'content': 0.10047348588705063, 'timestamp': '2025-09-10 02:46:02.767003', 'step': 9373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:02.822507', 'step': 9373, 'epoch': 2} {'type': 'loss', 'content': 0.11874613910913467, 'timestamp': '2025-09-10 02:46:02.824721', 'step': 9374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:02.888079', 'step': 9374, 'epoch': 2} {'type': 'loss', 'content': 0.1481419801712036, 'timestamp': '2025-09-10 02:46:02.890192', 'step': 9375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:02.945502', 'step': 9375, 'epoch': 2} {'type': 'loss', 'content': 0.12020685523748398, 'timestamp': '2025-09-10 02:46:02.953687', 'step': 9376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:03.013671', 'step': 9376, 'epoch': 2} {'type': 'loss', 'content': 0.11979816108942032, 'timestamp': '2025-09-10 02:46:03.016477', 'step': 9377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:03.073084', 'step': 9377, 'epoch': 2} {'type': 'loss', 'content': 0.14879348874092102, 'timestamp': '2025-09-10 02:46:03.074859', 'step': 9378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:03.128899', 'step': 9378, 'epoch': 2} {'type': 'loss', 'content': 0.12370369583368301, 'timestamp': '2025-09-10 02:46:03.130816', 'step': 9379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:03.184266', 'step': 9379, 'epoch': 2} {'type': 'loss', 'content': 0.10740088671445847, 'timestamp': '2025-09-10 02:46:03.189736', 'step': 9380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:03.242628', 'step': 9380, 'epoch': 2} {'type': 'loss', 'content': 0.18583150207996368, 'timestamp': '2025-09-10 02:46:03.244910', 'step': 9381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:03.297948', 'step': 9381, 'epoch': 2} {'type': 'loss', 'content': 0.09320072084665298, 'timestamp': '2025-09-10 02:46:03.300464', 'step': 9382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:03.353136', 'step': 9382, 'epoch': 2} {'type': 'loss', 'content': 0.10083366930484772, 'timestamp': '2025-09-10 02:46:03.355559', 'step': 9383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:03.408388', 'step': 9383, 'epoch': 2} {'type': 'loss', 'content': 0.18167178332805634, 'timestamp': '2025-09-10 02:46:03.414526', 'step': 9384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:03.467392', 'step': 9384, 'epoch': 2} {'type': 'loss', 'content': 0.17688371241092682, 'timestamp': '2025-09-10 02:46:03.469674', 'step': 9385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:03.523462', 'step': 9385, 'epoch': 2} {'type': 'loss', 'content': 0.20909881591796875, 'timestamp': '2025-09-10 02:46:03.525353', 'step': 9386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:03.578622', 'step': 9386, 'epoch': 2} {'type': 'loss', 'content': 0.0694778561592102, 'timestamp': '2025-09-10 02:46:03.581014', 'step': 9387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:03.635029', 'step': 9387, 'epoch': 2} {'type': 'loss', 'content': 0.16170595586299896, 'timestamp': '2025-09-10 02:46:03.641251', 'step': 9388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:03.695809', 'step': 9388, 'epoch': 2} {'type': 'loss', 'content': 0.15263643860816956, 'timestamp': '2025-09-10 02:46:03.698117', 'step': 9389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:03.750588', 'step': 9389, 'epoch': 2} {'type': 'loss', 'content': 0.10933233052492142, 'timestamp': '2025-09-10 02:46:03.752943', 'step': 9390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:03.806494', 'step': 9390, 'epoch': 2} {'type': 'loss', 'content': 0.1627436876296997, 'timestamp': '2025-09-10 02:46:03.808768', 'step': 9391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:03.862299', 'step': 9391, 'epoch': 2} {'type': 'loss', 'content': 0.1114402636885643, 'timestamp': '2025-09-10 02:46:03.868691', 'step': 9392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:03.921897', 'step': 9392, 'epoch': 2} {'type': 'loss', 'content': 0.18898719549179077, 'timestamp': '2025-09-10 02:46:03.923953', 'step': 9393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:03.977833', 'step': 9393, 'epoch': 2} {'type': 'loss', 'content': 0.13904935121536255, 'timestamp': '2025-09-10 02:46:03.979689', 'step': 9394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:04.032791', 'step': 9394, 'epoch': 2} {'type': 'loss', 'content': 0.11203394830226898, 'timestamp': '2025-09-10 02:46:04.034930', 'step': 9395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:04.088191', 'step': 9395, 'epoch': 2} {'type': 'loss', 'content': 0.17848968505859375, 'timestamp': '2025-09-10 02:46:04.094419', 'step': 9396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:04.147298', 'step': 9396, 'epoch': 2} {'type': 'loss', 'content': 0.10967220366001129, 'timestamp': '2025-09-10 02:46:04.149568', 'step': 9397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:04.202932', 'step': 9397, 'epoch': 2} {'type': 'loss', 'content': 0.16723433136940002, 'timestamp': '2025-09-10 02:46:04.205200', 'step': 9398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:04.258714', 'step': 9398, 'epoch': 2} {'type': 'loss', 'content': 0.18416821956634521, 'timestamp': '2025-09-10 02:46:04.261032', 'step': 9399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:04.316939', 'step': 9399, 'epoch': 2} {'type': 'loss', 'content': 0.15507017076015472, 'timestamp': '2025-09-10 02:46:04.323780', 'step': 9400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:04.378855', 'step': 9400, 'epoch': 2} {'type': 'loss', 'content': 0.22289255261421204, 'timestamp': '2025-09-10 02:46:04.381148', 'step': 9401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:04.436015', 'step': 9401, 'epoch': 2} {'type': 'loss', 'content': 0.14040949940681458, 'timestamp': '2025-09-10 02:46:04.438402', 'step': 9402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:04.493449', 'step': 9402, 'epoch': 2} {'type': 'loss', 'content': 0.11247528344392776, 'timestamp': '2025-09-10 02:46:04.495841', 'step': 9403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:04.550119', 'step': 9403, 'epoch': 2} {'type': 'loss', 'content': 0.1926143914461136, 'timestamp': '2025-09-10 02:46:04.556553', 'step': 9404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:04.610002', 'step': 9404, 'epoch': 2} {'type': 'loss', 'content': 0.21736249327659607, 'timestamp': '2025-09-10 02:46:04.612147', 'step': 9405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:04.666512', 'step': 9405, 'epoch': 2} {'type': 'loss', 'content': 0.13146187365055084, 'timestamp': '2025-09-10 02:46:04.668752', 'step': 9406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:04.723867', 'step': 9406, 'epoch': 2} {'type': 'loss', 'content': 0.13027770817279816, 'timestamp': '2025-09-10 02:46:04.726494', 'step': 9407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:04.781356', 'step': 9407, 'epoch': 2} {'type': 'loss', 'content': 0.14871792495250702, 'timestamp': '2025-09-10 02:46:04.787846', 'step': 9408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:04.841921', 'step': 9408, 'epoch': 2} {'type': 'loss', 'content': 0.2563295066356659, 'timestamp': '2025-09-10 02:46:04.844113', 'step': 9409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:04.899246', 'step': 9409, 'epoch': 2} {'type': 'loss', 'content': 0.12371449172496796, 'timestamp': '2025-09-10 02:46:04.901534', 'step': 9410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:04.955133', 'step': 9410, 'epoch': 2} {'type': 'loss', 'content': 0.24941973388195038, 'timestamp': '2025-09-10 02:46:04.957293', 'step': 9411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:05.010957', 'step': 9411, 'epoch': 2} {'type': 'loss', 'content': 0.09677756577730179, 'timestamp': '2025-09-10 02:46:05.017302', 'step': 9412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:05.070941', 'step': 9412, 'epoch': 2} {'type': 'loss', 'content': 0.14773084223270416, 'timestamp': '2025-09-10 02:46:05.073085', 'step': 9413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:05.127782', 'step': 9413, 'epoch': 2} {'type': 'loss', 'content': 0.08543190360069275, 'timestamp': '2025-09-10 02:46:05.130035', 'step': 9414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:05.183928', 'step': 9414, 'epoch': 2} {'type': 'loss', 'content': 0.10782621055841446, 'timestamp': '2025-09-10 02:46:05.186270', 'step': 9415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:05.241307', 'step': 9415, 'epoch': 2} {'type': 'loss', 'content': 0.12342555075883865, 'timestamp': '2025-09-10 02:46:05.247709', 'step': 9416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:05.301433', 'step': 9416, 'epoch': 2} {'type': 'loss', 'content': 0.12727044522762299, 'timestamp': '2025-09-10 02:46:05.304109', 'step': 9417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:05.357955', 'step': 9417, 'epoch': 2} {'type': 'loss', 'content': 0.1104028970003128, 'timestamp': '2025-09-10 02:46:05.360458', 'step': 9418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:05.415761', 'step': 9418, 'epoch': 2} {'type': 'loss', 'content': 0.10699589550495148, 'timestamp': '2025-09-10 02:46:05.418487', 'step': 9419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:05.474163', 'step': 9419, 'epoch': 2} {'type': 'loss', 'content': 0.15742127597332, 'timestamp': '2025-09-10 02:46:05.480553', 'step': 9420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:05.533982', 'step': 9420, 'epoch': 2} {'type': 'loss', 'content': 0.06888918578624725, 'timestamp': '2025-09-10 02:46:05.536298', 'step': 9421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:05.591380', 'step': 9421, 'epoch': 2} {'type': 'loss', 'content': 0.10531394183635712, 'timestamp': '2025-09-10 02:46:05.593630', 'step': 9422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:05.647692', 'step': 9422, 'epoch': 2} {'type': 'loss', 'content': 0.09328316152095795, 'timestamp': '2025-09-10 02:46:05.649983', 'step': 9423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:05.704608', 'step': 9423, 'epoch': 2} {'type': 'loss', 'content': 0.0880608782172203, 'timestamp': '2025-09-10 02:46:05.710670', 'step': 9424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:05.763403', 'step': 9424, 'epoch': 2} {'type': 'loss', 'content': 0.08952508121728897, 'timestamp': '2025-09-10 02:46:05.765507', 'step': 9425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:05.818768', 'step': 9425, 'epoch': 2} {'type': 'loss', 'content': 0.10781393945217133, 'timestamp': '2025-09-10 02:46:05.821140', 'step': 9426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:05.875302', 'step': 9426, 'epoch': 2} {'type': 'loss', 'content': 0.2440028339624405, 'timestamp': '2025-09-10 02:46:05.877652', 'step': 9427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:05.931432', 'step': 9427, 'epoch': 2} {'type': 'loss', 'content': 0.11626826226711273, 'timestamp': '2025-09-10 02:46:05.937841', 'step': 9428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:05.994706', 'step': 9428, 'epoch': 2} {'type': 'loss', 'content': 0.09166555106639862, 'timestamp': '2025-09-10 02:46:05.996800', 'step': 9429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:06.053025', 'step': 9429, 'epoch': 2} {'type': 'loss', 'content': 0.11287377774715424, 'timestamp': '2025-09-10 02:46:06.055193', 'step': 9430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:06.109683', 'step': 9430, 'epoch': 2} {'type': 'loss', 'content': 0.17303217947483063, 'timestamp': '2025-09-10 02:46:06.111874', 'step': 9431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:06.165850', 'step': 9431, 'epoch': 2} {'type': 'loss', 'content': 0.08715152740478516, 'timestamp': '2025-09-10 02:46:06.172115', 'step': 9432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:06.225305', 'step': 9432, 'epoch': 2} {'type': 'loss', 'content': 0.095897376537323, 'timestamp': '2025-09-10 02:46:06.227413', 'step': 9433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:06.281547', 'step': 9433, 'epoch': 2} {'type': 'loss', 'content': 0.12605822086334229, 'timestamp': '2025-09-10 02:46:06.284067', 'step': 9434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:06.339585', 'step': 9434, 'epoch': 2} {'type': 'loss', 'content': 0.07978208363056183, 'timestamp': '2025-09-10 02:46:06.341795', 'step': 9435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:06.395364', 'step': 9435, 'epoch': 2} {'type': 'loss', 'content': 0.10072414577007294, 'timestamp': '2025-09-10 02:46:06.401426', 'step': 9436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:06.454347', 'step': 9436, 'epoch': 2} {'type': 'loss', 'content': 0.13188457489013672, 'timestamp': '2025-09-10 02:46:06.456804', 'step': 9437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:06.510894', 'step': 9437, 'epoch': 2} {'type': 'loss', 'content': 0.1120435893535614, 'timestamp': '2025-09-10 02:46:06.513250', 'step': 9438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:06.569372', 'step': 9438, 'epoch': 2} {'type': 'loss', 'content': 0.17020677030086517, 'timestamp': '2025-09-10 02:46:06.571963', 'step': 9439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:06.626526', 'step': 9439, 'epoch': 2} {'type': 'loss', 'content': 0.12536673247814178, 'timestamp': '2025-09-10 02:46:06.632805', 'step': 9440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:06.686835', 'step': 9440, 'epoch': 2} {'type': 'loss', 'content': 0.12032601982355118, 'timestamp': '2025-09-10 02:46:06.693078', 'step': 9441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:06.748478', 'step': 9441, 'epoch': 2} {'type': 'loss', 'content': 0.24018679559230804, 'timestamp': '2025-09-10 02:46:06.750873', 'step': 9442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:06.805102', 'step': 9442, 'epoch': 2} {'type': 'loss', 'content': 0.1428825408220291, 'timestamp': '2025-09-10 02:46:06.807298', 'step': 9443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:06.865157', 'step': 9443, 'epoch': 2} {'type': 'loss', 'content': 0.1885755956172943, 'timestamp': '2025-09-10 02:46:06.871523', 'step': 9444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:06.925246', 'step': 9444, 'epoch': 2} {'type': 'loss', 'content': 0.0937628298997879, 'timestamp': '2025-09-10 02:46:06.928735', 'step': 9445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:06.985543', 'step': 9445, 'epoch': 2} {'type': 'loss', 'content': 0.09965730458498001, 'timestamp': '2025-09-10 02:46:06.988534', 'step': 9446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:07.043481', 'step': 9446, 'epoch': 2} {'type': 'loss', 'content': 0.1869250386953354, 'timestamp': '2025-09-10 02:46:07.050431', 'step': 9447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:07.106976', 'step': 9447, 'epoch': 2} {'type': 'loss', 'content': 0.13499142229557037, 'timestamp': '2025-09-10 02:46:07.113435', 'step': 9448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:07.167187', 'step': 9448, 'epoch': 2} {'type': 'loss', 'content': 0.22757485508918762, 'timestamp': '2025-09-10 02:46:07.169513', 'step': 9449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:07.224233', 'step': 9449, 'epoch': 2} {'type': 'loss', 'content': 0.1157592236995697, 'timestamp': '2025-09-10 02:46:07.226794', 'step': 9450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:07.281307', 'step': 9450, 'epoch': 2} {'type': 'loss', 'content': 0.15606416761875153, 'timestamp': '2025-09-10 02:46:07.284596', 'step': 9451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:07.338856', 'step': 9451, 'epoch': 2} {'type': 'loss', 'content': 0.09338410198688507, 'timestamp': '2025-09-10 02:46:07.345112', 'step': 9452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:07.399689', 'step': 9452, 'epoch': 2} {'type': 'loss', 'content': 0.0799146220088005, 'timestamp': '2025-09-10 02:46:07.401898', 'step': 9453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:07.455105', 'step': 9453, 'epoch': 2} {'type': 'loss', 'content': 0.17003023624420166, 'timestamp': '2025-09-10 02:46:07.457207', 'step': 9454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:07.511279', 'step': 9454, 'epoch': 2} {'type': 'loss', 'content': 0.20977802574634552, 'timestamp': '2025-09-10 02:46:07.513560', 'step': 9455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:07.569286', 'step': 9455, 'epoch': 2} {'type': 'loss', 'content': 0.14758668839931488, 'timestamp': '2025-09-10 02:46:07.575545', 'step': 9456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:07.629278', 'step': 9456, 'epoch': 2} {'type': 'loss', 'content': 0.1775941401720047, 'timestamp': '2025-09-10 02:46:07.631600', 'step': 9457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:07.685835', 'step': 9457, 'epoch': 2} {'type': 'loss', 'content': 0.1261754035949707, 'timestamp': '2025-09-10 02:46:07.688064', 'step': 9458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:07.743889', 'step': 9458, 'epoch': 2} {'type': 'loss', 'content': 0.10404620319604874, 'timestamp': '2025-09-10 02:46:07.746087', 'step': 9459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:07.803633', 'step': 9459, 'epoch': 2} {'type': 'loss', 'content': 0.10258577764034271, 'timestamp': '2025-09-10 02:46:07.809917', 'step': 9460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:07.863925', 'step': 9460, 'epoch': 2} {'type': 'loss', 'content': 0.06100457161664963, 'timestamp': '2025-09-10 02:46:07.866049', 'step': 9461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:07.919504', 'step': 9461, 'epoch': 2} {'type': 'loss', 'content': 0.1690695732831955, 'timestamp': '2025-09-10 02:46:07.921893', 'step': 9462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:07.978091', 'step': 9462, 'epoch': 2} {'type': 'loss', 'content': 0.13388027250766754, 'timestamp': '2025-09-10 02:46:07.980284', 'step': 9463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:08.034691', 'step': 9463, 'epoch': 2} {'type': 'loss', 'content': 0.2207973152399063, 'timestamp': '2025-09-10 02:46:08.043200', 'step': 9464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:08.097776', 'step': 9464, 'epoch': 2} {'type': 'loss', 'content': 0.09188849478960037, 'timestamp': '2025-09-10 02:46:08.100019', 'step': 9465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:08.160223', 'step': 9465, 'epoch': 2} {'type': 'loss', 'content': 0.09464361518621445, 'timestamp': '2025-09-10 02:46:08.162834', 'step': 9466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:08.217503', 'step': 9466, 'epoch': 2} {'type': 'loss', 'content': 0.1513006091117859, 'timestamp': '2025-09-10 02:46:08.219921', 'step': 9467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:08.274215', 'step': 9467, 'epoch': 2} {'type': 'loss', 'content': 0.17091763019561768, 'timestamp': '2025-09-10 02:46:08.280712', 'step': 9468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:08.335126', 'step': 9468, 'epoch': 2} {'type': 'loss', 'content': 0.11537016928195953, 'timestamp': '2025-09-10 02:46:08.337160', 'step': 9469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:08.393469', 'step': 9469, 'epoch': 2} {'type': 'loss', 'content': 0.18697518110275269, 'timestamp': '2025-09-10 02:46:08.395857', 'step': 9470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:08.451413', 'step': 9470, 'epoch': 2} {'type': 'loss', 'content': 0.08147972822189331, 'timestamp': '2025-09-10 02:46:08.453736', 'step': 9471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:08.507695', 'step': 9471, 'epoch': 2} {'type': 'loss', 'content': 0.12015309929847717, 'timestamp': '2025-09-10 02:46:08.514106', 'step': 9472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:08.568213', 'step': 9472, 'epoch': 2} {'type': 'loss', 'content': 0.22585242986679077, 'timestamp': '2025-09-10 02:46:08.570671', 'step': 9473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:08.625463', 'step': 9473, 'epoch': 2} {'type': 'loss', 'content': 0.17690542340278625, 'timestamp': '2025-09-10 02:46:08.627866', 'step': 9474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:08.683156', 'step': 9474, 'epoch': 2} {'type': 'loss', 'content': 0.11109870672225952, 'timestamp': '2025-09-10 02:46:08.685385', 'step': 9475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:08.739466', 'step': 9475, 'epoch': 2} {'type': 'loss', 'content': 0.05581975728273392, 'timestamp': '2025-09-10 02:46:08.746109', 'step': 9476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:08.800637', 'step': 9476, 'epoch': 2} {'type': 'loss', 'content': 0.08919285237789154, 'timestamp': '2025-09-10 02:46:08.803463', 'step': 9477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:08.858487', 'step': 9477, 'epoch': 2} {'type': 'loss', 'content': 0.1717437356710434, 'timestamp': '2025-09-10 02:46:08.861128', 'step': 9478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:08.915500', 'step': 9478, 'epoch': 2} {'type': 'loss', 'content': 0.0872105062007904, 'timestamp': '2025-09-10 02:46:08.918235', 'step': 9479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:08.972842', 'step': 9479, 'epoch': 2} {'type': 'loss', 'content': 0.23424889147281647, 'timestamp': '2025-09-10 02:46:08.979417', 'step': 9480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:09.033422', 'step': 9480, 'epoch': 2} {'type': 'loss', 'content': 0.1412079632282257, 'timestamp': '2025-09-10 02:46:09.035815', 'step': 9481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:09.090138', 'step': 9481, 'epoch': 2} {'type': 'loss', 'content': 0.1201607808470726, 'timestamp': '2025-09-10 02:46:09.092638', 'step': 9482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:09.148768', 'step': 9482, 'epoch': 2} {'type': 'loss', 'content': 0.18627874553203583, 'timestamp': '2025-09-10 02:46:09.151086', 'step': 9483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:09.205883', 'step': 9483, 'epoch': 2} {'type': 'loss', 'content': 0.11525177210569382, 'timestamp': '2025-09-10 02:46:09.212230', 'step': 9484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:09.266536', 'step': 9484, 'epoch': 2} {'type': 'loss', 'content': 0.19249650835990906, 'timestamp': '2025-09-10 02:46:09.268966', 'step': 9485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:09.322825', 'step': 9485, 'epoch': 2} {'type': 'loss', 'content': 0.11826913803815842, 'timestamp': '2025-09-10 02:46:09.325203', 'step': 9486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:09.380102', 'step': 9486, 'epoch': 2} {'type': 'loss', 'content': 0.14008811116218567, 'timestamp': '2025-09-10 02:46:09.382475', 'step': 9487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:09.436224', 'step': 9487, 'epoch': 2} {'type': 'loss', 'content': 0.1114986389875412, 'timestamp': '2025-09-10 02:46:09.442564', 'step': 9488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:09.495766', 'step': 9488, 'epoch': 2} {'type': 'loss', 'content': 0.11901009827852249, 'timestamp': '2025-09-10 02:46:09.498121', 'step': 9489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:09.551155', 'step': 9489, 'epoch': 2} {'type': 'loss', 'content': 0.12881222367286682, 'timestamp': '2025-09-10 02:46:09.553617', 'step': 9490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:09.607129', 'step': 9490, 'epoch': 2} {'type': 'loss', 'content': 0.05878099054098129, 'timestamp': '2025-09-10 02:46:09.609489', 'step': 9491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:09.663595', 'step': 9491, 'epoch': 2} {'type': 'loss', 'content': 0.10123411566019058, 'timestamp': '2025-09-10 02:46:09.669840', 'step': 9492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:09.724758', 'step': 9492, 'epoch': 2} {'type': 'loss', 'content': 0.24283716082572937, 'timestamp': '2025-09-10 02:46:09.727289', 'step': 9493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:09.783754', 'step': 9493, 'epoch': 2} {'type': 'loss', 'content': 0.11602745950222015, 'timestamp': '2025-09-10 02:46:09.786293', 'step': 9494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:09.841829', 'step': 9494, 'epoch': 2} {'type': 'loss', 'content': 0.06805597990751266, 'timestamp': '2025-09-10 02:46:09.844308', 'step': 9495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:09.902145', 'step': 9495, 'epoch': 2} {'type': 'loss', 'content': 0.18283303081989288, 'timestamp': '2025-09-10 02:46:09.908651', 'step': 9496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:09.962556', 'step': 9496, 'epoch': 2} {'type': 'loss', 'content': 0.11326105147600174, 'timestamp': '2025-09-10 02:46:09.964830', 'step': 9497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:10.018421', 'step': 9497, 'epoch': 2} {'type': 'loss', 'content': 0.19533374905586243, 'timestamp': '2025-09-10 02:46:10.020722', 'step': 9498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:10.073969', 'step': 9498, 'epoch': 2} {'type': 'loss', 'content': 0.17237263917922974, 'timestamp': '2025-09-10 02:46:10.076280', 'step': 9499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:10.129598', 'step': 9499, 'epoch': 2} {'type': 'loss', 'content': 0.11051584780216217, 'timestamp': '2025-09-10 02:46:10.135955', 'step': 9500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9500', 'timestamp': '2025-09-10 02:46:10.539692', 'step': 9500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:10.597507', 'step': 9500, 'epoch': 2} {'type': 'loss', 'content': 0.16262806951999664, 'timestamp': '2025-09-10 02:46:10.599823', 'step': 9501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:10.653944', 'step': 9501, 'epoch': 2} {'type': 'loss', 'content': 0.1156286671757698, 'timestamp': '2025-09-10 02:46:10.656362', 'step': 9502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:10.710035', 'step': 9502, 'epoch': 2} {'type': 'loss', 'content': 0.1572743058204651, 'timestamp': '2025-09-10 02:46:10.712463', 'step': 9503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:10.766651', 'step': 9503, 'epoch': 2} {'type': 'loss', 'content': 0.06758023798465729, 'timestamp': '2025-09-10 02:46:10.773010', 'step': 9504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:10.826651', 'step': 9504, 'epoch': 2} {'type': 'loss', 'content': 0.15211129188537598, 'timestamp': '2025-09-10 02:46:10.828953', 'step': 9505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:10.882129', 'step': 9505, 'epoch': 2} {'type': 'loss', 'content': 0.06547584384679794, 'timestamp': '2025-09-10 02:46:10.884243', 'step': 9506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:10.937194', 'step': 9506, 'epoch': 2} {'type': 'loss', 'content': 0.15405909717082977, 'timestamp': '2025-09-10 02:46:10.939552', 'step': 9507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:10.992828', 'step': 9507, 'epoch': 2} {'type': 'loss', 'content': 0.17205530405044556, 'timestamp': '2025-09-10 02:46:10.998971', 'step': 9508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:11.051225', 'step': 9508, 'epoch': 2} {'type': 'loss', 'content': 0.07279461622238159, 'timestamp': '2025-09-10 02:46:11.053457', 'step': 9509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:11.107126', 'step': 9509, 'epoch': 2} {'type': 'loss', 'content': 0.13832801580429077, 'timestamp': '2025-09-10 02:46:11.109445', 'step': 9510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:11.165062', 'step': 9510, 'epoch': 2} {'type': 'loss', 'content': 0.12343406677246094, 'timestamp': '2025-09-10 02:46:11.167486', 'step': 9511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:11.221794', 'step': 9511, 'epoch': 2} {'type': 'loss', 'content': 0.1481640487909317, 'timestamp': '2025-09-10 02:46:11.228267', 'step': 9512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:11.284211', 'step': 9512, 'epoch': 2} {'type': 'loss', 'content': 0.22673895955085754, 'timestamp': '2025-09-10 02:46:11.286635', 'step': 9513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:11.341425', 'step': 9513, 'epoch': 2} {'type': 'loss', 'content': 0.1032634973526001, 'timestamp': '2025-09-10 02:46:11.343657', 'step': 9514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:11.397627', 'step': 9514, 'epoch': 2} {'type': 'loss', 'content': 0.0633348897099495, 'timestamp': '2025-09-10 02:46:11.399850', 'step': 9515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:11.453755', 'step': 9515, 'epoch': 2} {'type': 'loss', 'content': 0.0754275992512703, 'timestamp': '2025-09-10 02:46:11.460060', 'step': 9516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:11.512903', 'step': 9516, 'epoch': 2} {'type': 'loss', 'content': 0.0944773331284523, 'timestamp': '2025-09-10 02:46:11.515196', 'step': 9517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:11.568855', 'step': 9517, 'epoch': 2} {'type': 'loss', 'content': 0.1945962756872177, 'timestamp': '2025-09-10 02:46:11.571240', 'step': 9518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:11.624658', 'step': 9518, 'epoch': 2} {'type': 'loss', 'content': 0.09952913224697113, 'timestamp': '2025-09-10 02:46:11.626964', 'step': 9519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:11.680295', 'step': 9519, 'epoch': 2} {'type': 'loss', 'content': 0.11857165396213531, 'timestamp': '2025-09-10 02:46:11.686627', 'step': 9520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:11.740140', 'step': 9520, 'epoch': 2} {'type': 'loss', 'content': 0.11344026029109955, 'timestamp': '2025-09-10 02:46:11.744738', 'step': 9521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:11.800202', 'step': 9521, 'epoch': 2} {'type': 'loss', 'content': 0.10770086199045181, 'timestamp': '2025-09-10 02:46:11.802685', 'step': 9522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:11.856779', 'step': 9522, 'epoch': 2} {'type': 'loss', 'content': 0.1291571408510208, 'timestamp': '2025-09-10 02:46:11.859168', 'step': 9523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:11.913056', 'step': 9523, 'epoch': 2} {'type': 'loss', 'content': 0.1818545013666153, 'timestamp': '2025-09-10 02:46:11.919466', 'step': 9524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:11.972552', 'step': 9524, 'epoch': 2} {'type': 'loss', 'content': 0.1344059705734253, 'timestamp': '2025-09-10 02:46:11.974774', 'step': 9525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:12.030649', 'step': 9525, 'epoch': 2} {'type': 'loss', 'content': 0.11317111551761627, 'timestamp': '2025-09-10 02:46:12.033067', 'step': 9526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:12.086493', 'step': 9526, 'epoch': 2} {'type': 'loss', 'content': 0.14856888353824615, 'timestamp': '2025-09-10 02:46:12.088922', 'step': 9527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:12.142173', 'step': 9527, 'epoch': 2} {'type': 'loss', 'content': 0.11729662865400314, 'timestamp': '2025-09-10 02:46:12.148433', 'step': 9528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:12.201287', 'step': 9528, 'epoch': 2} {'type': 'loss', 'content': 0.1544087529182434, 'timestamp': '2025-09-10 02:46:12.203653', 'step': 9529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:12.257295', 'step': 9529, 'epoch': 2} {'type': 'loss', 'content': 0.11068131029605865, 'timestamp': '2025-09-10 02:46:12.259593', 'step': 9530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:12.312930', 'step': 9530, 'epoch': 2} {'type': 'loss', 'content': 0.19534000754356384, 'timestamp': '2025-09-10 02:46:12.316075', 'step': 9531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:12.372553', 'step': 9531, 'epoch': 2} {'type': 'loss', 'content': 0.1743791699409485, 'timestamp': '2025-09-10 02:46:12.378812', 'step': 9532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:12.431292', 'step': 9532, 'epoch': 2} {'type': 'loss', 'content': 0.128486767411232, 'timestamp': '2025-09-10 02:46:12.434061', 'step': 9533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:12.490152', 'step': 9533, 'epoch': 2} {'type': 'loss', 'content': 0.10281630605459213, 'timestamp': '2025-09-10 02:46:12.492576', 'step': 9534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:12.545951', 'step': 9534, 'epoch': 2} {'type': 'loss', 'content': 0.09142360091209412, 'timestamp': '2025-09-10 02:46:12.548720', 'step': 9535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:12.604210', 'step': 9535, 'epoch': 2} {'type': 'loss', 'content': 0.09327647089958191, 'timestamp': '2025-09-10 02:46:12.611174', 'step': 9536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:12.663880', 'step': 9536, 'epoch': 2} {'type': 'loss', 'content': 0.18928386270999908, 'timestamp': '2025-09-10 02:46:12.666459', 'step': 9537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:12.722079', 'step': 9537, 'epoch': 2} {'type': 'loss', 'content': 0.17034730315208435, 'timestamp': '2025-09-10 02:46:12.724597', 'step': 9538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:12.780392', 'step': 9538, 'epoch': 2} {'type': 'loss', 'content': 0.12222088128328323, 'timestamp': '2025-09-10 02:46:12.782868', 'step': 9539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:12.836814', 'step': 9539, 'epoch': 2} {'type': 'loss', 'content': 0.13197559118270874, 'timestamp': '2025-09-10 02:46:12.842999', 'step': 9540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:12.896763', 'step': 9540, 'epoch': 2} {'type': 'loss', 'content': 0.1117323562502861, 'timestamp': '2025-09-10 02:46:12.899084', 'step': 9541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:12.953256', 'step': 9541, 'epoch': 2} {'type': 'loss', 'content': 0.12469115108251572, 'timestamp': '2025-09-10 02:46:12.955592', 'step': 9542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.010650', 'step': 9542, 'epoch': 2} {'type': 'loss', 'content': 0.1383838951587677, 'timestamp': '2025-09-10 02:46:13.012951', 'step': 9543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.066658', 'step': 9543, 'epoch': 2} {'type': 'loss', 'content': 0.09526917338371277, 'timestamp': '2025-09-10 02:46:13.072916', 'step': 9544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:13.125978', 'step': 9544, 'epoch': 2} {'type': 'loss', 'content': 0.10671477764844894, 'timestamp': '2025-09-10 02:46:13.130852', 'step': 9545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:13.187667', 'step': 9545, 'epoch': 2} {'type': 'loss', 'content': 0.09125044196844101, 'timestamp': '2025-09-10 02:46:13.190102', 'step': 9546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.244015', 'step': 9546, 'epoch': 2} {'type': 'loss', 'content': 0.1645117998123169, 'timestamp': '2025-09-10 02:46:13.247659', 'step': 9547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:13.302018', 'step': 9547, 'epoch': 2} {'type': 'loss', 'content': 0.07508146017789841, 'timestamp': '2025-09-10 02:46:13.309251', 'step': 9548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.363278', 'step': 9548, 'epoch': 2} {'type': 'loss', 'content': 0.13586832582950592, 'timestamp': '2025-09-10 02:46:13.365565', 'step': 9549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.419833', 'step': 9549, 'epoch': 2} {'type': 'loss', 'content': 0.05550198629498482, 'timestamp': '2025-09-10 02:46:13.422225', 'step': 9550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.479019', 'step': 9550, 'epoch': 2} {'type': 'loss', 'content': 0.1710384339094162, 'timestamp': '2025-09-10 02:46:13.482333', 'step': 9551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.538128', 'step': 9551, 'epoch': 2} {'type': 'loss', 'content': 0.18277710676193237, 'timestamp': '2025-09-10 02:46:13.544654', 'step': 9552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:13.599280', 'step': 9552, 'epoch': 2} {'type': 'loss', 'content': 0.22647923231124878, 'timestamp': '2025-09-10 02:46:13.601688', 'step': 9553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.656580', 'step': 9553, 'epoch': 2} {'type': 'loss', 'content': 0.16213646531105042, 'timestamp': '2025-09-10 02:46:13.658821', 'step': 9554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.715710', 'step': 9554, 'epoch': 2} {'type': 'loss', 'content': 0.09069707244634628, 'timestamp': '2025-09-10 02:46:13.717961', 'step': 9555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:13.771895', 'step': 9555, 'epoch': 2} {'type': 'loss', 'content': 0.08853242546319962, 'timestamp': '2025-09-10 02:46:13.778210', 'step': 9556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.831126', 'step': 9556, 'epoch': 2} {'type': 'loss', 'content': 0.1488872915506363, 'timestamp': '2025-09-10 02:46:13.833282', 'step': 9557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:13.886637', 'step': 9557, 'epoch': 2} {'type': 'loss', 'content': 0.18183250725269318, 'timestamp': '2025-09-10 02:46:13.888936', 'step': 9558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:13.943350', 'step': 9558, 'epoch': 2} {'type': 'loss', 'content': 0.13627266883850098, 'timestamp': '2025-09-10 02:46:13.945828', 'step': 9559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:14.001052', 'step': 9559, 'epoch': 2} {'type': 'loss', 'content': 0.15155699849128723, 'timestamp': '2025-09-10 02:46:14.011556', 'step': 9560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:14.065096', 'step': 9560, 'epoch': 2} {'type': 'loss', 'content': 0.13967008888721466, 'timestamp': '2025-09-10 02:46:14.067403', 'step': 9561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:14.122460', 'step': 9561, 'epoch': 2} {'type': 'loss', 'content': 0.17896923422813416, 'timestamp': '2025-09-10 02:46:14.124732', 'step': 9562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:14.178328', 'step': 9562, 'epoch': 2} {'type': 'loss', 'content': 0.15744075179100037, 'timestamp': '2025-09-10 02:46:14.181930', 'step': 9563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:14.236709', 'step': 9563, 'epoch': 2} {'type': 'loss', 'content': 0.12893493473529816, 'timestamp': '2025-09-10 02:46:14.242907', 'step': 9564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:14.295921', 'step': 9564, 'epoch': 2} {'type': 'loss', 'content': 0.15538465976715088, 'timestamp': '2025-09-10 02:46:14.298376', 'step': 9565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:14.352084', 'step': 9565, 'epoch': 2} {'type': 'loss', 'content': 0.16930700838565826, 'timestamp': '2025-09-10 02:46:14.354632', 'step': 9566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:14.409014', 'step': 9566, 'epoch': 2} {'type': 'loss', 'content': 0.13505761325359344, 'timestamp': '2025-09-10 02:46:14.411230', 'step': 9567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:14.464877', 'step': 9567, 'epoch': 2} {'type': 'loss', 'content': 0.13184449076652527, 'timestamp': '2025-09-10 02:46:14.470871', 'step': 9568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:14.525435', 'step': 9568, 'epoch': 2} {'type': 'loss', 'content': 0.1422751098871231, 'timestamp': '2025-09-10 02:46:14.527730', 'step': 9569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:14.581433', 'step': 9569, 'epoch': 2} {'type': 'loss', 'content': 0.07472404837608337, 'timestamp': '2025-09-10 02:46:14.583779', 'step': 9570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:14.638296', 'step': 9570, 'epoch': 2} {'type': 'loss', 'content': 0.17564724385738373, 'timestamp': '2025-09-10 02:46:14.640584', 'step': 9571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:14.694228', 'step': 9571, 'epoch': 2} {'type': 'loss', 'content': 0.12675414979457855, 'timestamp': '2025-09-10 02:46:14.700420', 'step': 9572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:14.753182', 'step': 9572, 'epoch': 2} {'type': 'loss', 'content': 0.1136590912938118, 'timestamp': '2025-09-10 02:46:14.755539', 'step': 9573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:14.808997', 'step': 9573, 'epoch': 2} {'type': 'loss', 'content': 0.14144451916217804, 'timestamp': '2025-09-10 02:46:14.811512', 'step': 9574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:14.865128', 'step': 9574, 'epoch': 2} {'type': 'loss', 'content': 0.2090996503829956, 'timestamp': '2025-09-10 02:46:14.867478', 'step': 9575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:14.920762', 'step': 9575, 'epoch': 2} {'type': 'loss', 'content': 0.1722981482744217, 'timestamp': '2025-09-10 02:46:14.926609', 'step': 9576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:14.979987', 'step': 9576, 'epoch': 2} {'type': 'loss', 'content': 0.13765230774879456, 'timestamp': '2025-09-10 02:46:14.982300', 'step': 9577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:15.036341', 'step': 9577, 'epoch': 2} {'type': 'loss', 'content': 0.10665491968393326, 'timestamp': '2025-09-10 02:46:15.038746', 'step': 9578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:15.093935', 'step': 9578, 'epoch': 2} {'type': 'loss', 'content': 0.11777330189943314, 'timestamp': '2025-09-10 02:46:15.096248', 'step': 9579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:15.150055', 'step': 9579, 'epoch': 2} {'type': 'loss', 'content': 0.12400444597005844, 'timestamp': '2025-09-10 02:46:15.156324', 'step': 9580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:15.209732', 'step': 9580, 'epoch': 2} {'type': 'loss', 'content': 0.13637636601924896, 'timestamp': '2025-09-10 02:46:15.212238', 'step': 9581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:15.265312', 'step': 9581, 'epoch': 2} {'type': 'loss', 'content': 0.13364961743354797, 'timestamp': '2025-09-10 02:46:15.267621', 'step': 9582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:15.321203', 'step': 9582, 'epoch': 2} {'type': 'loss', 'content': 0.10969626158475876, 'timestamp': '2025-09-10 02:46:15.323796', 'step': 9583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:15.377999', 'step': 9583, 'epoch': 2} {'type': 'loss', 'content': 0.10850083082914352, 'timestamp': '2025-09-10 02:46:15.384227', 'step': 9584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:15.442381', 'step': 9584, 'epoch': 2} {'type': 'loss', 'content': 0.1405174285173416, 'timestamp': '2025-09-10 02:46:15.444705', 'step': 9585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:15.499292', 'step': 9585, 'epoch': 2} {'type': 'loss', 'content': 0.13771146535873413, 'timestamp': '2025-09-10 02:46:15.501622', 'step': 9586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:15.556841', 'step': 9586, 'epoch': 2} {'type': 'loss', 'content': 0.08615223318338394, 'timestamp': '2025-09-10 02:46:15.559088', 'step': 9587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:15.616062', 'step': 9587, 'epoch': 2} {'type': 'loss', 'content': 0.23792792856693268, 'timestamp': '2025-09-10 02:46:15.622432', 'step': 9588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:15.677057', 'step': 9588, 'epoch': 2} {'type': 'loss', 'content': 0.14046283066272736, 'timestamp': '2025-09-10 02:46:15.679456', 'step': 9589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:15.737519', 'step': 9589, 'epoch': 2} {'type': 'loss', 'content': 0.08186281472444534, 'timestamp': '2025-09-10 02:46:15.739832', 'step': 9590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:15.796248', 'step': 9590, 'epoch': 2} {'type': 'loss', 'content': 0.12621957063674927, 'timestamp': '2025-09-10 02:46:15.798733', 'step': 9591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:15.853600', 'step': 9591, 'epoch': 2} {'type': 'loss', 'content': 0.19673971831798553, 'timestamp': '2025-09-10 02:46:15.860240', 'step': 9592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:15.914465', 'step': 9592, 'epoch': 2} {'type': 'loss', 'content': 0.10939866304397583, 'timestamp': '2025-09-10 02:46:15.916789', 'step': 9593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:15.970319', 'step': 9593, 'epoch': 2} {'type': 'loss', 'content': 0.06469478458166122, 'timestamp': '2025-09-10 02:46:15.972742', 'step': 9594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:16.026979', 'step': 9594, 'epoch': 2} {'type': 'loss', 'content': 0.2768806219100952, 'timestamp': '2025-09-10 02:46:16.029570', 'step': 9595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:16.083409', 'step': 9595, 'epoch': 2} {'type': 'loss', 'content': 0.0670916736125946, 'timestamp': '2025-09-10 02:46:16.089761', 'step': 9596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:16.146721', 'step': 9596, 'epoch': 2} {'type': 'loss', 'content': 0.12326172739267349, 'timestamp': '2025-09-10 02:46:16.149049', 'step': 9597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:16.203713', 'step': 9597, 'epoch': 2} {'type': 'loss', 'content': 0.21101200580596924, 'timestamp': '2025-09-10 02:46:16.205985', 'step': 9598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:16.260200', 'step': 9598, 'epoch': 2} {'type': 'loss', 'content': 0.09341677278280258, 'timestamp': '2025-09-10 02:46:16.262357', 'step': 9599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:16.316287', 'step': 9599, 'epoch': 2} {'type': 'loss', 'content': 0.09204263240098953, 'timestamp': '2025-09-10 02:46:16.322394', 'step': 9600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:16.375000', 'step': 9600, 'epoch': 2} {'type': 'loss', 'content': 0.13476607203483582, 'timestamp': '2025-09-10 02:46:16.377163', 'step': 9601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:16.430580', 'step': 9601, 'epoch': 2} {'type': 'loss', 'content': 0.09831978380680084, 'timestamp': '2025-09-10 02:46:16.432747', 'step': 9602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:16.486217', 'step': 9602, 'epoch': 2} {'type': 'loss', 'content': 0.14028775691986084, 'timestamp': '2025-09-10 02:46:16.488359', 'step': 9603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:16.541755', 'step': 9603, 'epoch': 2} {'type': 'loss', 'content': 0.12643784284591675, 'timestamp': '2025-09-10 02:46:16.548065', 'step': 9604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:16.600908', 'step': 9604, 'epoch': 2} {'type': 'loss', 'content': 0.1160270944237709, 'timestamp': '2025-09-10 02:46:16.603296', 'step': 9605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:16.657332', 'step': 9605, 'epoch': 2} {'type': 'loss', 'content': 0.11873774230480194, 'timestamp': '2025-09-10 02:46:16.659746', 'step': 9606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:16.713882', 'step': 9606, 'epoch': 2} {'type': 'loss', 'content': 0.10792641341686249, 'timestamp': '2025-09-10 02:46:16.716369', 'step': 9607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:16.770230', 'step': 9607, 'epoch': 2} {'type': 'loss', 'content': 0.09801606088876724, 'timestamp': '2025-09-10 02:46:16.776601', 'step': 9608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:16.829803', 'step': 9608, 'epoch': 2} {'type': 'loss', 'content': 0.10791213810443878, 'timestamp': '2025-09-10 02:46:16.832205', 'step': 9609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:16.885714', 'step': 9609, 'epoch': 2} {'type': 'loss', 'content': 0.09027796238660812, 'timestamp': '2025-09-10 02:46:16.888438', 'step': 9610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:16.942612', 'step': 9610, 'epoch': 2} {'type': 'loss', 'content': 0.1677745282649994, 'timestamp': '2025-09-10 02:46:16.945065', 'step': 9611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:16.998409', 'step': 9611, 'epoch': 2} {'type': 'loss', 'content': 0.13033270835876465, 'timestamp': '2025-09-10 02:46:17.004639', 'step': 9612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:17.058680', 'step': 9612, 'epoch': 2} {'type': 'loss', 'content': 0.08317525684833527, 'timestamp': '2025-09-10 02:46:17.061016', 'step': 9613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:17.114757', 'step': 9613, 'epoch': 2} {'type': 'loss', 'content': 0.09878559410572052, 'timestamp': '2025-09-10 02:46:17.117102', 'step': 9614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:17.170773', 'step': 9614, 'epoch': 2} {'type': 'loss', 'content': 0.12634387612342834, 'timestamp': '2025-09-10 02:46:17.173193', 'step': 9615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:17.227272', 'step': 9615, 'epoch': 2} {'type': 'loss', 'content': 0.15981163084506989, 'timestamp': '2025-09-10 02:46:17.233469', 'step': 9616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:17.287222', 'step': 9616, 'epoch': 2} {'type': 'loss', 'content': 0.15104703605175018, 'timestamp': '2025-09-10 02:46:17.289496', 'step': 9617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:17.343564', 'step': 9617, 'epoch': 2} {'type': 'loss', 'content': 0.054631877690553665, 'timestamp': '2025-09-10 02:46:17.345869', 'step': 9618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:17.399653', 'step': 9618, 'epoch': 2} {'type': 'loss', 'content': 0.19730831682682037, 'timestamp': '2025-09-10 02:46:17.401980', 'step': 9619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:17.457784', 'step': 9619, 'epoch': 2} {'type': 'loss', 'content': 0.12821632623672485, 'timestamp': '2025-09-10 02:46:17.463981', 'step': 9620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:17.516769', 'step': 9620, 'epoch': 2} {'type': 'loss', 'content': 0.1087942123413086, 'timestamp': '2025-09-10 02:46:17.519190', 'step': 9621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:17.572459', 'step': 9621, 'epoch': 2} {'type': 'loss', 'content': 0.22189173102378845, 'timestamp': '2025-09-10 02:46:17.574766', 'step': 9622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:17.629653', 'step': 9622, 'epoch': 2} {'type': 'loss', 'content': 0.17327433824539185, 'timestamp': '2025-09-10 02:46:17.632061', 'step': 9623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:17.685705', 'step': 9623, 'epoch': 2} {'type': 'loss', 'content': 0.16948838531970978, 'timestamp': '2025-09-10 02:46:17.692180', 'step': 9624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:17.749327', 'step': 9624, 'epoch': 2} {'type': 'loss', 'content': 0.1337720900774002, 'timestamp': '2025-09-10 02:46:17.751605', 'step': 9625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:17.805141', 'step': 9625, 'epoch': 2} {'type': 'loss', 'content': 0.0977947935461998, 'timestamp': '2025-09-10 02:46:17.807430', 'step': 9626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:17.865320', 'step': 9626, 'epoch': 2} {'type': 'loss', 'content': 0.1434621810913086, 'timestamp': '2025-09-10 02:46:17.867584', 'step': 9627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:17.920556', 'step': 9627, 'epoch': 2} {'type': 'loss', 'content': 0.24686865508556366, 'timestamp': '2025-09-10 02:46:17.926631', 'step': 9628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:17.979999', 'step': 9628, 'epoch': 2} {'type': 'loss', 'content': 0.16494052112102509, 'timestamp': '2025-09-10 02:46:17.982466', 'step': 9629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:18.036426', 'step': 9629, 'epoch': 2} {'type': 'loss', 'content': 0.1390361338853836, 'timestamp': '2025-09-10 02:46:18.038797', 'step': 9630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:18.093077', 'step': 9630, 'epoch': 2} {'type': 'loss', 'content': 0.19693489372730255, 'timestamp': '2025-09-10 02:46:18.095396', 'step': 9631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:18.148871', 'step': 9631, 'epoch': 2} {'type': 'loss', 'content': 0.20665881037712097, 'timestamp': '2025-09-10 02:46:18.155134', 'step': 9632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:18.208111', 'step': 9632, 'epoch': 2} {'type': 'loss', 'content': 0.18831878900527954, 'timestamp': '2025-09-10 02:46:18.210446', 'step': 9633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:18.264739', 'step': 9633, 'epoch': 2} {'type': 'loss', 'content': 0.1608380675315857, 'timestamp': '2025-09-10 02:46:18.267066', 'step': 9634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:18.321409', 'step': 9634, 'epoch': 2} {'type': 'loss', 'content': 0.18299126625061035, 'timestamp': '2025-09-10 02:46:18.323690', 'step': 9635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:18.378683', 'step': 9635, 'epoch': 2} {'type': 'loss', 'content': 0.11624129861593246, 'timestamp': '2025-09-10 02:46:18.385135', 'step': 9636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:18.439060', 'step': 9636, 'epoch': 2} {'type': 'loss', 'content': 0.11887673288583755, 'timestamp': '2025-09-10 02:46:18.441566', 'step': 9637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:18.497159', 'step': 9637, 'epoch': 2} {'type': 'loss', 'content': 0.08802555501461029, 'timestamp': '2025-09-10 02:46:18.499718', 'step': 9638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:18.554874', 'step': 9638, 'epoch': 2} {'type': 'loss', 'content': 0.0886782854795456, 'timestamp': '2025-09-10 02:46:18.557362', 'step': 9639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:18.612087', 'step': 9639, 'epoch': 2} {'type': 'loss', 'content': 0.13170179724693298, 'timestamp': '2025-09-10 02:46:18.618608', 'step': 9640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:18.671754', 'step': 9640, 'epoch': 2} {'type': 'loss', 'content': 0.10753722488880157, 'timestamp': '2025-09-10 02:46:18.674058', 'step': 9641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:18.728048', 'step': 9641, 'epoch': 2} {'type': 'loss', 'content': 0.27522751688957214, 'timestamp': '2025-09-10 02:46:18.730195', 'step': 9642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:18.784988', 'step': 9642, 'epoch': 2} {'type': 'loss', 'content': 0.10078469663858414, 'timestamp': '2025-09-10 02:46:18.787337', 'step': 9643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:18.841067', 'step': 9643, 'epoch': 2} {'type': 'loss', 'content': 0.1560099720954895, 'timestamp': '2025-09-10 02:46:18.847234', 'step': 9644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:18.900391', 'step': 9644, 'epoch': 2} {'type': 'loss', 'content': 0.1888456791639328, 'timestamp': '2025-09-10 02:46:18.902459', 'step': 9645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:18.956072', 'step': 9645, 'epoch': 2} {'type': 'loss', 'content': 0.15897518396377563, 'timestamp': '2025-09-10 02:46:18.958425', 'step': 9646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:19.013301', 'step': 9646, 'epoch': 2} {'type': 'loss', 'content': 0.11964771151542664, 'timestamp': '2025-09-10 02:46:19.015529', 'step': 9647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:19.069932', 'step': 9647, 'epoch': 2} {'type': 'loss', 'content': 0.21339918673038483, 'timestamp': '2025-09-10 02:46:19.076300', 'step': 9648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:19.129232', 'step': 9648, 'epoch': 2} {'type': 'loss', 'content': 0.09173852205276489, 'timestamp': '2025-09-10 02:46:19.131594', 'step': 9649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:19.185248', 'step': 9649, 'epoch': 2} {'type': 'loss', 'content': 0.13147833943367004, 'timestamp': '2025-09-10 02:46:19.187615', 'step': 9650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:19.241468', 'step': 9650, 'epoch': 2} {'type': 'loss', 'content': 0.0888366624712944, 'timestamp': '2025-09-10 02:46:19.243718', 'step': 9651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:19.297654', 'step': 9651, 'epoch': 2} {'type': 'loss', 'content': 0.12045316398143768, 'timestamp': '2025-09-10 02:46:19.303906', 'step': 9652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:19.357922', 'step': 9652, 'epoch': 2} {'type': 'loss', 'content': 0.11707563698291779, 'timestamp': '2025-09-10 02:46:19.360449', 'step': 9653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:19.414628', 'step': 9653, 'epoch': 2} {'type': 'loss', 'content': 0.14250944554805756, 'timestamp': '2025-09-10 02:46:19.416937', 'step': 9654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:19.471669', 'step': 9654, 'epoch': 2} {'type': 'loss', 'content': 0.14290744066238403, 'timestamp': '2025-09-10 02:46:19.474015', 'step': 9655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:19.528922', 'step': 9655, 'epoch': 2} {'type': 'loss', 'content': 0.14533226191997528, 'timestamp': '2025-09-10 02:46:19.535212', 'step': 9656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:19.589813', 'step': 9656, 'epoch': 2} {'type': 'loss', 'content': 0.10622875392436981, 'timestamp': '2025-09-10 02:46:19.594633', 'step': 9657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:19.651742', 'step': 9657, 'epoch': 2} {'type': 'loss', 'content': 0.04882485419511795, 'timestamp': '2025-09-10 02:46:19.658186', 'step': 9658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:19.712919', 'step': 9658, 'epoch': 2} {'type': 'loss', 'content': 0.12389519065618515, 'timestamp': '2025-09-10 02:46:19.717229', 'step': 9659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:19.784286', 'step': 9659, 'epoch': 2} {'type': 'loss', 'content': 0.16611522436141968, 'timestamp': '2025-09-10 02:46:19.791481', 'step': 9660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:19.847212', 'step': 9660, 'epoch': 2} {'type': 'loss', 'content': 0.07518690824508667, 'timestamp': '2025-09-10 02:46:19.850094', 'step': 9661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:19.908264', 'step': 9661, 'epoch': 2} {'type': 'loss', 'content': 0.15141399204730988, 'timestamp': '2025-09-10 02:46:19.913947', 'step': 9662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:19.977337', 'step': 9662, 'epoch': 2} {'type': 'loss', 'content': 0.11384248733520508, 'timestamp': '2025-09-10 02:46:19.979760', 'step': 9663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:20.034204', 'step': 9663, 'epoch': 2} {'type': 'loss', 'content': 0.07207764685153961, 'timestamp': '2025-09-10 02:46:20.040404', 'step': 9664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:20.101190', 'step': 9664, 'epoch': 2} {'type': 'loss', 'content': 0.10007555037736893, 'timestamp': '2025-09-10 02:46:20.104007', 'step': 9665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:20.171631', 'step': 9665, 'epoch': 2} {'type': 'loss', 'content': 0.2172362059354782, 'timestamp': '2025-09-10 02:46:20.175655', 'step': 9666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:20.234678', 'step': 9666, 'epoch': 2} {'type': 'loss', 'content': 0.1460244208574295, 'timestamp': '2025-09-10 02:46:20.243880', 'step': 9667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:20.301620', 'step': 9667, 'epoch': 2} {'type': 'loss', 'content': 0.12471015751361847, 'timestamp': '2025-09-10 02:46:20.310925', 'step': 9668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:20.372999', 'step': 9668, 'epoch': 2} {'type': 'loss', 'content': 0.10045436024665833, 'timestamp': '2025-09-10 02:46:20.382886', 'step': 9669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:20.441507', 'step': 9669, 'epoch': 2} {'type': 'loss', 'content': 0.2001449018716812, 'timestamp': '2025-09-10 02:46:20.444665', 'step': 9670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:20.502587', 'step': 9670, 'epoch': 2} {'type': 'loss', 'content': 0.2766270339488983, 'timestamp': '2025-09-10 02:46:20.505925', 'step': 9671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:20.562532', 'step': 9671, 'epoch': 2} {'type': 'loss', 'content': 0.30916693806648254, 'timestamp': '2025-09-10 02:46:20.573411', 'step': 9672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:20.633877', 'step': 9672, 'epoch': 2} {'type': 'loss', 'content': 0.05897919088602066, 'timestamp': '2025-09-10 02:46:20.637414', 'step': 9673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:20.691512', 'step': 9673, 'epoch': 2} {'type': 'loss', 'content': 0.16901198029518127, 'timestamp': '2025-09-10 02:46:20.695323', 'step': 9674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:20.755439', 'step': 9674, 'epoch': 2} {'type': 'loss', 'content': 0.1583832949399948, 'timestamp': '2025-09-10 02:46:20.758666', 'step': 9675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:20.815059', 'step': 9675, 'epoch': 2} {'type': 'loss', 'content': 0.17570742964744568, 'timestamp': '2025-09-10 02:46:20.824069', 'step': 9676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:20.881286', 'step': 9676, 'epoch': 2} {'type': 'loss', 'content': 0.10549868643283844, 'timestamp': '2025-09-10 02:46:20.883855', 'step': 9677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:20.938797', 'step': 9677, 'epoch': 2} {'type': 'loss', 'content': 0.23185180127620697, 'timestamp': '2025-09-10 02:46:20.941532', 'step': 9678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:20.997590', 'step': 9678, 'epoch': 2} {'type': 'loss', 'content': 0.12844489514827728, 'timestamp': '2025-09-10 02:46:21.000960', 'step': 9679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:21.059622', 'step': 9679, 'epoch': 2} {'type': 'loss', 'content': 0.16608397662639618, 'timestamp': '2025-09-10 02:46:21.066913', 'step': 9680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:21.123331', 'step': 9680, 'epoch': 2} {'type': 'loss', 'content': 0.08285205066204071, 'timestamp': '2025-09-10 02:46:21.125708', 'step': 9681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:21.180284', 'step': 9681, 'epoch': 2} {'type': 'loss', 'content': 0.08527709543704987, 'timestamp': '2025-09-10 02:46:21.182799', 'step': 9682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:21.236644', 'step': 9682, 'epoch': 2} {'type': 'loss', 'content': 0.18584534525871277, 'timestamp': '2025-09-10 02:46:21.238934', 'step': 9683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:21.292898', 'step': 9683, 'epoch': 2} {'type': 'loss', 'content': 0.15400537848472595, 'timestamp': '2025-09-10 02:46:21.299163', 'step': 9684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:21.352811', 'step': 9684, 'epoch': 2} {'type': 'loss', 'content': 0.13777631521224976, 'timestamp': '2025-09-10 02:46:21.355074', 'step': 9685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:21.409368', 'step': 9685, 'epoch': 2} {'type': 'loss', 'content': 0.17600587010383606, 'timestamp': '2025-09-10 02:46:21.411526', 'step': 9686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:21.465240', 'step': 9686, 'epoch': 2} {'type': 'loss', 'content': 0.14801239967346191, 'timestamp': '2025-09-10 02:46:21.467557', 'step': 9687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:21.521600', 'step': 9687, 'epoch': 2} {'type': 'loss', 'content': 0.12028706818819046, 'timestamp': '2025-09-10 02:46:21.527848', 'step': 9688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:21.581174', 'step': 9688, 'epoch': 2} {'type': 'loss', 'content': 0.11062668263912201, 'timestamp': '2025-09-10 02:46:21.583622', 'step': 9689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:21.637644', 'step': 9689, 'epoch': 2} {'type': 'loss', 'content': 0.13907679915428162, 'timestamp': '2025-09-10 02:46:21.640006', 'step': 9690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:21.694869', 'step': 9690, 'epoch': 2} {'type': 'loss', 'content': 0.09293882548809052, 'timestamp': '2025-09-10 02:46:21.697062', 'step': 9691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:21.751734', 'step': 9691, 'epoch': 2} {'type': 'loss', 'content': 0.13207918405532837, 'timestamp': '2025-09-10 02:46:21.757747', 'step': 9692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:21.811685', 'step': 9692, 'epoch': 2} {'type': 'loss', 'content': 0.06721892952919006, 'timestamp': '2025-09-10 02:46:21.813559', 'step': 9693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:21.868247', 'step': 9693, 'epoch': 2} {'type': 'loss', 'content': 0.15882234275341034, 'timestamp': '2025-09-10 02:46:21.872122', 'step': 9694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:21.927792', 'step': 9694, 'epoch': 2} {'type': 'loss', 'content': 0.1470291167497635, 'timestamp': '2025-09-10 02:46:21.930233', 'step': 9695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:21.987201', 'step': 9695, 'epoch': 2} {'type': 'loss', 'content': 0.12958374619483948, 'timestamp': '2025-09-10 02:46:21.993325', 'step': 9696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:22.046979', 'step': 9696, 'epoch': 2} {'type': 'loss', 'content': 0.19376105070114136, 'timestamp': '2025-09-10 02:46:22.048966', 'step': 9697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:22.103150', 'step': 9697, 'epoch': 2} {'type': 'loss', 'content': 0.12192914634943008, 'timestamp': '2025-09-10 02:46:22.105120', 'step': 9698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:22.160312', 'step': 9698, 'epoch': 2} {'type': 'loss', 'content': 0.09778349101543427, 'timestamp': '2025-09-10 02:46:22.162265', 'step': 9699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:22.217270', 'step': 9699, 'epoch': 2} {'type': 'loss', 'content': 0.08684156835079193, 'timestamp': '2025-09-10 02:46:22.223237', 'step': 9700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:22.277480', 'step': 9700, 'epoch': 2} {'type': 'loss', 'content': 0.18293486535549164, 'timestamp': '2025-09-10 02:46:22.279525', 'step': 9701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:22.336658', 'step': 9701, 'epoch': 2} {'type': 'loss', 'content': 0.1625460386276245, 'timestamp': '2025-09-10 02:46:22.338911', 'step': 9702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:22.393590', 'step': 9702, 'epoch': 2} {'type': 'loss', 'content': 0.21329492330551147, 'timestamp': '2025-09-10 02:46:22.395768', 'step': 9703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:22.449837', 'step': 9703, 'epoch': 2} {'type': 'loss', 'content': 0.05881375074386597, 'timestamp': '2025-09-10 02:46:22.456149', 'step': 9704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:22.510058', 'step': 9704, 'epoch': 2} {'type': 'loss', 'content': 0.19640715420246124, 'timestamp': '2025-09-10 02:46:22.512333', 'step': 9705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:22.566615', 'step': 9705, 'epoch': 2} {'type': 'loss', 'content': 0.13641829788684845, 'timestamp': '2025-09-10 02:46:22.569091', 'step': 9706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:22.623649', 'step': 9706, 'epoch': 2} {'type': 'loss', 'content': 0.13541419804096222, 'timestamp': '2025-09-10 02:46:22.626034', 'step': 9707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:22.701870', 'step': 9707, 'epoch': 2} {'type': 'loss', 'content': 0.0996016189455986, 'timestamp': '2025-09-10 02:46:22.707969', 'step': 9708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:22.770240', 'step': 9708, 'epoch': 2} {'type': 'loss', 'content': 0.0856381207704544, 'timestamp': '2025-09-10 02:46:22.772199', 'step': 9709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:22.842238', 'step': 9709, 'epoch': 2} {'type': 'loss', 'content': 0.13672438263893127, 'timestamp': '2025-09-10 02:46:22.844292', 'step': 9710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:22.902306', 'step': 9710, 'epoch': 2} {'type': 'loss', 'content': 0.09498772025108337, 'timestamp': '2025-09-10 02:46:22.904865', 'step': 9711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:22.968551', 'step': 9711, 'epoch': 2} {'type': 'loss', 'content': 0.16086511313915253, 'timestamp': '2025-09-10 02:46:22.974804', 'step': 9712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:23.029332', 'step': 9712, 'epoch': 2} {'type': 'loss', 'content': 0.09638458490371704, 'timestamp': '2025-09-10 02:46:23.031534', 'step': 9713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:23.085849', 'step': 9713, 'epoch': 2} {'type': 'loss', 'content': 0.12183346599340439, 'timestamp': '2025-09-10 02:46:23.088062', 'step': 9714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:23.142677', 'step': 9714, 'epoch': 2} {'type': 'loss', 'content': 0.13723361492156982, 'timestamp': '2025-09-10 02:46:23.144905', 'step': 9715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:23.199529', 'step': 9715, 'epoch': 2} {'type': 'loss', 'content': 0.12872932851314545, 'timestamp': '2025-09-10 02:46:23.205693', 'step': 9716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:23.259954', 'step': 9716, 'epoch': 2} {'type': 'loss', 'content': 0.17044763267040253, 'timestamp': '2025-09-10 02:46:23.261859', 'step': 9717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:23.316553', 'step': 9717, 'epoch': 2} {'type': 'loss', 'content': 0.06842511147260666, 'timestamp': '2025-09-10 02:46:23.318581', 'step': 9718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:23.374368', 'step': 9718, 'epoch': 2} {'type': 'loss', 'content': 0.20362019538879395, 'timestamp': '2025-09-10 02:46:23.376559', 'step': 9719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:23.431645', 'step': 9719, 'epoch': 2} {'type': 'loss', 'content': 0.11040231585502625, 'timestamp': '2025-09-10 02:46:23.437725', 'step': 9720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:23.493521', 'step': 9720, 'epoch': 2} {'type': 'loss', 'content': 0.13487090170383453, 'timestamp': '2025-09-10 02:46:23.495396', 'step': 9721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:23.550846', 'step': 9721, 'epoch': 2} {'type': 'loss', 'content': 0.17077146470546722, 'timestamp': '2025-09-10 02:46:23.552945', 'step': 9722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:23.608247', 'step': 9722, 'epoch': 2} {'type': 'loss', 'content': 0.08531391620635986, 'timestamp': '2025-09-10 02:46:23.610720', 'step': 9723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:23.665573', 'step': 9723, 'epoch': 2} {'type': 'loss', 'content': 0.11793307960033417, 'timestamp': '2025-09-10 02:46:23.671706', 'step': 9724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:23.724843', 'step': 9724, 'epoch': 2} {'type': 'loss', 'content': 0.102993443608284, 'timestamp': '2025-09-10 02:46:23.726895', 'step': 9725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:23.781252', 'step': 9725, 'epoch': 2} {'type': 'loss', 'content': 0.15347938239574432, 'timestamp': '2025-09-10 02:46:23.783559', 'step': 9726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:23.839493', 'step': 9726, 'epoch': 2} {'type': 'loss', 'content': 0.15259520709514618, 'timestamp': '2025-09-10 02:46:23.841622', 'step': 9727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:23.895764', 'step': 9727, 'epoch': 2} {'type': 'loss', 'content': 0.06856934726238251, 'timestamp': '2025-09-10 02:46:23.902251', 'step': 9728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:23.956477', 'step': 9728, 'epoch': 2} {'type': 'loss', 'content': 0.24168014526367188, 'timestamp': '2025-09-10 02:46:23.958382', 'step': 9729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:24.013516', 'step': 9729, 'epoch': 2} {'type': 'loss', 'content': 0.08950918912887573, 'timestamp': '2025-09-10 02:46:24.015612', 'step': 9730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:24.069630', 'step': 9730, 'epoch': 2} {'type': 'loss', 'content': 0.1384722888469696, 'timestamp': '2025-09-10 02:46:24.071625', 'step': 9731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:24.125331', 'step': 9731, 'epoch': 2} {'type': 'loss', 'content': 0.10543760657310486, 'timestamp': '2025-09-10 02:46:24.131453', 'step': 9732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:24.185110', 'step': 9732, 'epoch': 2} {'type': 'loss', 'content': 0.1547362208366394, 'timestamp': '2025-09-10 02:46:24.187302', 'step': 9733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:24.242584', 'step': 9733, 'epoch': 2} {'type': 'loss', 'content': 0.08385258167982101, 'timestamp': '2025-09-10 02:46:24.244451', 'step': 9734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:24.298013', 'step': 9734, 'epoch': 2} {'type': 'loss', 'content': 0.15645749866962433, 'timestamp': '2025-09-10 02:46:24.299904', 'step': 9735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:24.355462', 'step': 9735, 'epoch': 2} {'type': 'loss', 'content': 0.19316168129444122, 'timestamp': '2025-09-10 02:46:24.361821', 'step': 9736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:24.415181', 'step': 9736, 'epoch': 2} {'type': 'loss', 'content': 0.1536749005317688, 'timestamp': '2025-09-10 02:46:24.417536', 'step': 9737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:24.472127', 'step': 9737, 'epoch': 2} {'type': 'loss', 'content': 0.15275011956691742, 'timestamp': '2025-09-10 02:46:24.474333', 'step': 9738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:24.528883', 'step': 9738, 'epoch': 2} {'type': 'loss', 'content': 0.1860988587141037, 'timestamp': '2025-09-10 02:46:24.531240', 'step': 9739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:24.585488', 'step': 9739, 'epoch': 2} {'type': 'loss', 'content': 0.1717333197593689, 'timestamp': '2025-09-10 02:46:24.591969', 'step': 9740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:24.645975', 'step': 9740, 'epoch': 2} {'type': 'loss', 'content': 0.18738220632076263, 'timestamp': '2025-09-10 02:46:24.647959', 'step': 9741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:24.701362', 'step': 9741, 'epoch': 2} {'type': 'loss', 'content': 0.14228838682174683, 'timestamp': '2025-09-10 02:46:24.703187', 'step': 9742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:24.757936', 'step': 9742, 'epoch': 2} {'type': 'loss', 'content': 0.16030801832675934, 'timestamp': '2025-09-10 02:46:24.759838', 'step': 9743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:24.813473', 'step': 9743, 'epoch': 2} {'type': 'loss', 'content': 0.1405041217803955, 'timestamp': '2025-09-10 02:46:24.819780', 'step': 9744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:24.874893', 'step': 9744, 'epoch': 2} {'type': 'loss', 'content': 0.1011291891336441, 'timestamp': '2025-09-10 02:46:24.877193', 'step': 9745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:24.931189', 'step': 9745, 'epoch': 2} {'type': 'loss', 'content': 0.06453629583120346, 'timestamp': '2025-09-10 02:46:24.933763', 'step': 9746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:24.995693', 'step': 9746, 'epoch': 2} {'type': 'loss', 'content': 0.07537724822759628, 'timestamp': '2025-09-10 02:46:24.998051', 'step': 9747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:25.052927', 'step': 9747, 'epoch': 2} {'type': 'loss', 'content': 0.1411653310060501, 'timestamp': '2025-09-10 02:46:25.059001', 'step': 9748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:25.113441', 'step': 9748, 'epoch': 2} {'type': 'loss', 'content': 0.18878212571144104, 'timestamp': '2025-09-10 02:46:25.115344', 'step': 9749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:25.169696', 'step': 9749, 'epoch': 2} {'type': 'loss', 'content': 0.16663677990436554, 'timestamp': '2025-09-10 02:46:25.172133', 'step': 9750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:25.226136', 'step': 9750, 'epoch': 2} {'type': 'loss', 'content': 0.20920325815677643, 'timestamp': '2025-09-10 02:46:25.227951', 'step': 9751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:25.281718', 'step': 9751, 'epoch': 2} {'type': 'loss', 'content': 0.11332761496305466, 'timestamp': '2025-09-10 02:46:25.287478', 'step': 9752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:25.342300', 'step': 9752, 'epoch': 2} {'type': 'loss', 'content': 0.16052307188510895, 'timestamp': '2025-09-10 02:46:25.344580', 'step': 9753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:25.399494', 'step': 9753, 'epoch': 2} {'type': 'loss', 'content': 0.11681238561868668, 'timestamp': '2025-09-10 02:46:25.402055', 'step': 9754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:25.459243', 'step': 9754, 'epoch': 2} {'type': 'loss', 'content': 0.2099275290966034, 'timestamp': '2025-09-10 02:46:25.461723', 'step': 9755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:25.516676', 'step': 9755, 'epoch': 2} {'type': 'loss', 'content': 0.09190208464860916, 'timestamp': '2025-09-10 02:46:25.523065', 'step': 9756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:25.577706', 'step': 9756, 'epoch': 2} {'type': 'loss', 'content': 0.1159648671746254, 'timestamp': '2025-09-10 02:46:25.580175', 'step': 9757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:25.635728', 'step': 9757, 'epoch': 2} {'type': 'loss', 'content': 0.07432863116264343, 'timestamp': '2025-09-10 02:46:25.638260', 'step': 9758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:25.693531', 'step': 9758, 'epoch': 2} {'type': 'loss', 'content': 0.12303707748651505, 'timestamp': '2025-09-10 02:46:25.695777', 'step': 9759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:25.751694', 'step': 9759, 'epoch': 2} {'type': 'loss', 'content': 0.13168764114379883, 'timestamp': '2025-09-10 02:46:25.758020', 'step': 9760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:25.811759', 'step': 9760, 'epoch': 2} {'type': 'loss', 'content': 0.11406192928552628, 'timestamp': '2025-09-10 02:46:25.814172', 'step': 9761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:25.868490', 'step': 9761, 'epoch': 2} {'type': 'loss', 'content': 0.1047556921839714, 'timestamp': '2025-09-10 02:46:25.872436', 'step': 9762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:25.931032', 'step': 9762, 'epoch': 2} {'type': 'loss', 'content': 0.2185119390487671, 'timestamp': '2025-09-10 02:46:25.933466', 'step': 9763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:25.988171', 'step': 9763, 'epoch': 2} {'type': 'loss', 'content': 0.1535279005765915, 'timestamp': '2025-09-10 02:46:25.996794', 'step': 9764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:26.052642', 'step': 9764, 'epoch': 2} {'type': 'loss', 'content': 0.14814598858356476, 'timestamp': '2025-09-10 02:46:26.054895', 'step': 9765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:26.111812', 'step': 9765, 'epoch': 2} {'type': 'loss', 'content': 0.136004239320755, 'timestamp': '2025-09-10 02:46:26.115970', 'step': 9766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:26.177223', 'step': 9766, 'epoch': 2} {'type': 'loss', 'content': 0.06445495784282684, 'timestamp': '2025-09-10 02:46:26.179815', 'step': 9767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:26.233966', 'step': 9767, 'epoch': 2} {'type': 'loss', 'content': 0.24456745386123657, 'timestamp': '2025-09-10 02:46:26.240232', 'step': 9768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:26.294088', 'step': 9768, 'epoch': 2} {'type': 'loss', 'content': 0.17356611788272858, 'timestamp': '2025-09-10 02:46:26.296507', 'step': 9769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:26.351796', 'step': 9769, 'epoch': 2} {'type': 'loss', 'content': 0.18843092024326324, 'timestamp': '2025-09-10 02:46:26.354237', 'step': 9770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:26.416754', 'step': 9770, 'epoch': 2} {'type': 'loss', 'content': 0.06251295655965805, 'timestamp': '2025-09-10 02:46:26.419112', 'step': 9771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:26.475059', 'step': 9771, 'epoch': 2} {'type': 'loss', 'content': 0.11028013378381729, 'timestamp': '2025-09-10 02:46:26.481834', 'step': 9772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:26.536675', 'step': 9772, 'epoch': 2} {'type': 'loss', 'content': 0.11212623864412308, 'timestamp': '2025-09-10 02:46:26.538740', 'step': 9773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:26.593487', 'step': 9773, 'epoch': 2} {'type': 'loss', 'content': 0.10007768124341965, 'timestamp': '2025-09-10 02:46:26.595811', 'step': 9774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:26.650305', 'step': 9774, 'epoch': 2} {'type': 'loss', 'content': 0.12375548481941223, 'timestamp': '2025-09-10 02:46:26.652604', 'step': 9775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:26.706508', 'step': 9775, 'epoch': 2} {'type': 'loss', 'content': 0.0902935117483139, 'timestamp': '2025-09-10 02:46:26.713002', 'step': 9776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:26.766891', 'step': 9776, 'epoch': 2} {'type': 'loss', 'content': 0.07587442547082901, 'timestamp': '2025-09-10 02:46:26.769292', 'step': 9777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:26.824115', 'step': 9777, 'epoch': 2} {'type': 'loss', 'content': 0.0694795474410057, 'timestamp': '2025-09-10 02:46:26.826467', 'step': 9778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:26.881651', 'step': 9778, 'epoch': 2} {'type': 'loss', 'content': 0.1699819564819336, 'timestamp': '2025-09-10 02:46:26.883907', 'step': 9779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:26.938863', 'step': 9779, 'epoch': 2} {'type': 'loss', 'content': 0.08682210743427277, 'timestamp': '2025-09-10 02:46:26.945109', 'step': 9780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:26.999636', 'step': 9780, 'epoch': 2} {'type': 'loss', 'content': 0.20259375870227814, 'timestamp': '2025-09-10 02:46:27.002046', 'step': 9781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:27.057028', 'step': 9781, 'epoch': 2} {'type': 'loss', 'content': 0.1136913150548935, 'timestamp': '2025-09-10 02:46:27.059687', 'step': 9782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:27.114839', 'step': 9782, 'epoch': 2} {'type': 'loss', 'content': 0.11283764243125916, 'timestamp': '2025-09-10 02:46:27.117228', 'step': 9783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:27.171098', 'step': 9783, 'epoch': 2} {'type': 'loss', 'content': 0.1915852576494217, 'timestamp': '2025-09-10 02:46:27.177484', 'step': 9784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:27.231459', 'step': 9784, 'epoch': 2} {'type': 'loss', 'content': 0.228669211268425, 'timestamp': '2025-09-10 02:46:27.233564', 'step': 9785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:27.287246', 'step': 9785, 'epoch': 2} {'type': 'loss', 'content': 0.09382367134094238, 'timestamp': '2025-09-10 02:46:27.289534', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:46:40.120079', 'step': 9786, 'epoch': 2} {'type': 'pplx', 'content': 12018.986734104079, 'timestamp': '2025-09-10 02:46:40.123140', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:40.177529', 'step': 9786, 'epoch': 2} {'type': 'loss', 'content': 0.12163931876420975, 'timestamp': '2025-09-10 02:46:40.179497', 'step': 9787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:40.233333', 'step': 9787, 'epoch': 2} {'type': 'loss', 'content': 0.091029092669487, 'timestamp': '2025-09-10 02:46:40.239414', 'step': 9788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:40.292126', 'step': 9788, 'epoch': 2} {'type': 'loss', 'content': 0.13181984424591064, 'timestamp': '2025-09-10 02:46:40.294351', 'step': 9789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:40.347201', 'step': 9789, 'epoch': 2} {'type': 'loss', 'content': 0.07704601436853409, 'timestamp': '2025-09-10 02:46:40.349252', 'step': 9790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:40.402961', 'step': 9790, 'epoch': 2} {'type': 'loss', 'content': 0.1164361760020256, 'timestamp': '2025-09-10 02:46:40.405160', 'step': 9791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:40.458452', 'step': 9791, 'epoch': 2} {'type': 'loss', 'content': 0.21363046765327454, 'timestamp': '2025-09-10 02:46:40.464429', 'step': 9792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:40.517029', 'step': 9792, 'epoch': 2} {'type': 'loss', 'content': 0.09498053789138794, 'timestamp': '2025-09-10 02:46:40.519064', 'step': 9793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:40.572298', 'step': 9793, 'epoch': 2} {'type': 'loss', 'content': 0.1631835401058197, 'timestamp': '2025-09-10 02:46:40.574321', 'step': 9794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:40.628080', 'step': 9794, 'epoch': 2} {'type': 'loss', 'content': 0.11490566283464432, 'timestamp': '2025-09-10 02:46:40.630076', 'step': 9795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:40.683270', 'step': 9795, 'epoch': 2} {'type': 'loss', 'content': 0.131472647190094, 'timestamp': '2025-09-10 02:46:40.689032', 'step': 9796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:40.742207', 'step': 9796, 'epoch': 2} {'type': 'loss', 'content': 0.1175539493560791, 'timestamp': '2025-09-10 02:46:40.744289', 'step': 9797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:40.797226', 'step': 9797, 'epoch': 2} {'type': 'loss', 'content': 0.07941305637359619, 'timestamp': '2025-09-10 02:46:40.799190', 'step': 9798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:40.852608', 'step': 9798, 'epoch': 2} {'type': 'loss', 'content': 0.08909226953983307, 'timestamp': '2025-09-10 02:46:40.854576', 'step': 9799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:40.908057', 'step': 9799, 'epoch': 2} {'type': 'loss', 'content': 0.16731584072113037, 'timestamp': '2025-09-10 02:46:40.913880', 'step': 9800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:40.967501', 'step': 9800, 'epoch': 2} {'type': 'loss', 'content': 0.14560265839099884, 'timestamp': '2025-09-10 02:46:40.969531', 'step': 9801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:41.022350', 'step': 9801, 'epoch': 2} {'type': 'loss', 'content': 0.11377358436584473, 'timestamp': '2025-09-10 02:46:41.024362', 'step': 9802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:41.077837', 'step': 9802, 'epoch': 2} {'type': 'loss', 'content': 0.15185485780239105, 'timestamp': '2025-09-10 02:46:41.079891', 'step': 9803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:41.133472', 'step': 9803, 'epoch': 2} {'type': 'loss', 'content': 0.11962432414293289, 'timestamp': '2025-09-10 02:46:41.139210', 'step': 9804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:41.192847', 'step': 9804, 'epoch': 2} {'type': 'loss', 'content': 0.15460477769374847, 'timestamp': '2025-09-10 02:46:41.194935', 'step': 9805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:41.247691', 'step': 9805, 'epoch': 2} {'type': 'loss', 'content': 0.10369475185871124, 'timestamp': '2025-09-10 02:46:41.249722', 'step': 9806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:41.303015', 'step': 9806, 'epoch': 2} {'type': 'loss', 'content': 0.09133420884609222, 'timestamp': '2025-09-10 02:46:41.305007', 'step': 9807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:41.357543', 'step': 9807, 'epoch': 2} {'type': 'loss', 'content': 0.10538630932569504, 'timestamp': '2025-09-10 02:46:41.363287', 'step': 9808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:41.415633', 'step': 9808, 'epoch': 2} {'type': 'loss', 'content': 0.15890003740787506, 'timestamp': '2025-09-10 02:46:41.417743', 'step': 9809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:41.471747', 'step': 9809, 'epoch': 2} {'type': 'loss', 'content': 0.10567105561494827, 'timestamp': '2025-09-10 02:46:41.473754', 'step': 9810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:41.528005', 'step': 9810, 'epoch': 2} {'type': 'loss', 'content': 0.1928149163722992, 'timestamp': '2025-09-10 02:46:41.530011', 'step': 9811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:41.583517', 'step': 9811, 'epoch': 2} {'type': 'loss', 'content': 0.05677761137485504, 'timestamp': '2025-09-10 02:46:41.589333', 'step': 9812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:41.641809', 'step': 9812, 'epoch': 2} {'type': 'loss', 'content': 0.09207721054553986, 'timestamp': '2025-09-10 02:46:41.643818', 'step': 9813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:41.696886', 'step': 9813, 'epoch': 2} {'type': 'loss', 'content': 0.06216011568903923, 'timestamp': '2025-09-10 02:46:41.698916', 'step': 9814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:41.752030', 'step': 9814, 'epoch': 2} {'type': 'loss', 'content': 0.11985402554273605, 'timestamp': '2025-09-10 02:46:41.754049', 'step': 9815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:41.807047', 'step': 9815, 'epoch': 2} {'type': 'loss', 'content': 0.12465875595808029, 'timestamp': '2025-09-10 02:46:41.812827', 'step': 9816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:41.866167', 'step': 9816, 'epoch': 2} {'type': 'loss', 'content': 0.1230681836605072, 'timestamp': '2025-09-10 02:46:41.868133', 'step': 9817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:41.920999', 'step': 9817, 'epoch': 2} {'type': 'loss', 'content': 0.09855637699365616, 'timestamp': '2025-09-10 02:46:41.923135', 'step': 9818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:41.976558', 'step': 9818, 'epoch': 2} {'type': 'loss', 'content': 0.12572842836380005, 'timestamp': '2025-09-10 02:46:41.978608', 'step': 9819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:42.033340', 'step': 9819, 'epoch': 2} {'type': 'loss', 'content': 0.1508091390132904, 'timestamp': '2025-09-10 02:46:42.039542', 'step': 9820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:42.093200', 'step': 9820, 'epoch': 2} {'type': 'loss', 'content': 0.1491490602493286, 'timestamp': '2025-09-10 02:46:42.095254', 'step': 9821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:42.148171', 'step': 9821, 'epoch': 2} {'type': 'loss', 'content': 0.1635192185640335, 'timestamp': '2025-09-10 02:46:42.150212', 'step': 9822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:42.203025', 'step': 9822, 'epoch': 2} {'type': 'loss', 'content': 0.1440592110157013, 'timestamp': '2025-09-10 02:46:42.205052', 'step': 9823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:42.258690', 'step': 9823, 'epoch': 2} {'type': 'loss', 'content': 0.33996760845184326, 'timestamp': '2025-09-10 02:46:42.264577', 'step': 9824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:42.317710', 'step': 9824, 'epoch': 2} {'type': 'loss', 'content': 0.1755085289478302, 'timestamp': '2025-09-10 02:46:42.319696', 'step': 9825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:42.373055', 'step': 9825, 'epoch': 2} {'type': 'loss', 'content': 0.21351107954978943, 'timestamp': '2025-09-10 02:46:42.375032', 'step': 9826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:42.428036', 'step': 9826, 'epoch': 2} {'type': 'loss', 'content': 0.1606931984424591, 'timestamp': '2025-09-10 02:46:42.430126', 'step': 9827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:42.483747', 'step': 9827, 'epoch': 2} {'type': 'loss', 'content': 0.11060958355665207, 'timestamp': '2025-09-10 02:46:42.489483', 'step': 9828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:42.542538', 'step': 9828, 'epoch': 2} {'type': 'loss', 'content': 0.1712804138660431, 'timestamp': '2025-09-10 02:46:42.544543', 'step': 9829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:42.598422', 'step': 9829, 'epoch': 2} {'type': 'loss', 'content': 0.08395211398601532, 'timestamp': '2025-09-10 02:46:42.601804', 'step': 9830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:42.657225', 'step': 9830, 'epoch': 2} {'type': 'loss', 'content': 0.07500435411930084, 'timestamp': '2025-09-10 02:46:42.659216', 'step': 9831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:42.714021', 'step': 9831, 'epoch': 2} {'type': 'loss', 'content': 0.0775122195482254, 'timestamp': '2025-09-10 02:46:42.720024', 'step': 9832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:42.773903', 'step': 9832, 'epoch': 2} {'type': 'loss', 'content': 0.1511448621749878, 'timestamp': '2025-09-10 02:46:42.775898', 'step': 9833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:42.831909', 'step': 9833, 'epoch': 2} {'type': 'loss', 'content': 0.1502372920513153, 'timestamp': '2025-09-10 02:46:42.834071', 'step': 9834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:42.887651', 'step': 9834, 'epoch': 2} {'type': 'loss', 'content': 0.10818112641572952, 'timestamp': '2025-09-10 02:46:42.889924', 'step': 9835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:42.943061', 'step': 9835, 'epoch': 2} {'type': 'loss', 'content': 0.17140084505081177, 'timestamp': '2025-09-10 02:46:42.948831', 'step': 9836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:43.001614', 'step': 9836, 'epoch': 2} {'type': 'loss', 'content': 0.23084641993045807, 'timestamp': '2025-09-10 02:46:43.003641', 'step': 9837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:43.056345', 'step': 9837, 'epoch': 2} {'type': 'loss', 'content': 0.16953197121620178, 'timestamp': '2025-09-10 02:46:43.060356', 'step': 9838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:43.120216', 'step': 9838, 'epoch': 2} {'type': 'loss', 'content': 0.11903499066829681, 'timestamp': '2025-09-10 02:46:43.122458', 'step': 9839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:43.175789', 'step': 9839, 'epoch': 2} {'type': 'loss', 'content': 0.0826360359787941, 'timestamp': '2025-09-10 02:46:43.181753', 'step': 9840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:43.235062', 'step': 9840, 'epoch': 2} {'type': 'loss', 'content': 0.10405494272708893, 'timestamp': '2025-09-10 02:46:43.236965', 'step': 9841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:43.290432', 'step': 9841, 'epoch': 2} {'type': 'loss', 'content': 0.18397080898284912, 'timestamp': '2025-09-10 02:46:43.294292', 'step': 9842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:43.349035', 'step': 9842, 'epoch': 2} {'type': 'loss', 'content': 0.18190494179725647, 'timestamp': '2025-09-10 02:46:43.351069', 'step': 9843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:43.403991', 'step': 9843, 'epoch': 2} {'type': 'loss', 'content': 0.2178119271993637, 'timestamp': '2025-09-10 02:46:43.409798', 'step': 9844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:43.461974', 'step': 9844, 'epoch': 2} {'type': 'loss', 'content': 0.14898446202278137, 'timestamp': '2025-09-10 02:46:43.463980', 'step': 9845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:43.519444', 'step': 9845, 'epoch': 2} {'type': 'loss', 'content': 0.15229904651641846, 'timestamp': '2025-09-10 02:46:43.521399', 'step': 9846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:43.574866', 'step': 9846, 'epoch': 2} {'type': 'loss', 'content': 0.142337366938591, 'timestamp': '2025-09-10 02:46:43.576800', 'step': 9847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:43.630537', 'step': 9847, 'epoch': 2} {'type': 'loss', 'content': 0.13285702466964722, 'timestamp': '2025-09-10 02:46:43.636592', 'step': 9848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:43.691339', 'step': 9848, 'epoch': 2} {'type': 'loss', 'content': 0.1432163417339325, 'timestamp': '2025-09-10 02:46:43.693405', 'step': 9849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:43.746800', 'step': 9849, 'epoch': 2} {'type': 'loss', 'content': 0.16401565074920654, 'timestamp': '2025-09-10 02:46:43.748928', 'step': 9850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:43.802694', 'step': 9850, 'epoch': 2} {'type': 'loss', 'content': 0.21788537502288818, 'timestamp': '2025-09-10 02:46:43.804712', 'step': 9851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:43.858071', 'step': 9851, 'epoch': 2} {'type': 'loss', 'content': 0.17492841184139252, 'timestamp': '2025-09-10 02:46:43.863994', 'step': 9852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:43.916336', 'step': 9852, 'epoch': 2} {'type': 'loss', 'content': 0.21393916010856628, 'timestamp': '2025-09-10 02:46:43.918304', 'step': 9853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:43.971681', 'step': 9853, 'epoch': 2} {'type': 'loss', 'content': 0.09388543665409088, 'timestamp': '2025-09-10 02:46:43.973925', 'step': 9854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:44.028376', 'step': 9854, 'epoch': 2} {'type': 'loss', 'content': 0.16653797030448914, 'timestamp': '2025-09-10 02:46:44.030579', 'step': 9855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:44.084881', 'step': 9855, 'epoch': 2} {'type': 'loss', 'content': 0.1761520802974701, 'timestamp': '2025-09-10 02:46:44.090705', 'step': 9856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:44.143570', 'step': 9856, 'epoch': 2} {'type': 'loss', 'content': 0.06635987013578415, 'timestamp': '2025-09-10 02:46:44.145604', 'step': 9857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:44.198947', 'step': 9857, 'epoch': 2} {'type': 'loss', 'content': 0.0924265906214714, 'timestamp': '2025-09-10 02:46:44.201122', 'step': 9858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:44.254956', 'step': 9858, 'epoch': 2} {'type': 'loss', 'content': 0.19064724445343018, 'timestamp': '2025-09-10 02:46:44.257077', 'step': 9859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:44.313006', 'step': 9859, 'epoch': 2} {'type': 'loss', 'content': 0.08576449751853943, 'timestamp': '2025-09-10 02:46:44.319016', 'step': 9860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:44.372300', 'step': 9860, 'epoch': 2} {'type': 'loss', 'content': 0.14491558074951172, 'timestamp': '2025-09-10 02:46:44.374482', 'step': 9861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:44.428385', 'step': 9861, 'epoch': 2} {'type': 'loss', 'content': 0.09412969648838043, 'timestamp': '2025-09-10 02:46:44.430542', 'step': 9862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:44.486754', 'step': 9862, 'epoch': 2} {'type': 'loss', 'content': 0.13150237500667572, 'timestamp': '2025-09-10 02:46:44.489095', 'step': 9863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:44.544715', 'step': 9863, 'epoch': 2} {'type': 'loss', 'content': 0.11117120087146759, 'timestamp': '2025-09-10 02:46:44.550988', 'step': 9864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:44.605237', 'step': 9864, 'epoch': 2} {'type': 'loss', 'content': 0.08247421681880951, 'timestamp': '2025-09-10 02:46:44.609080', 'step': 9865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:44.663997', 'step': 9865, 'epoch': 2} {'type': 'loss', 'content': 0.12019173055887222, 'timestamp': '2025-09-10 02:46:44.666019', 'step': 9866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:44.727623', 'step': 9866, 'epoch': 2} {'type': 'loss', 'content': 0.13203343749046326, 'timestamp': '2025-09-10 02:46:44.731211', 'step': 9867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:44.785319', 'step': 9867, 'epoch': 2} {'type': 'loss', 'content': 0.07137124985456467, 'timestamp': '2025-09-10 02:46:44.793875', 'step': 9868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:44.846654', 'step': 9868, 'epoch': 2} {'type': 'loss', 'content': 0.16827388107776642, 'timestamp': '2025-09-10 02:46:44.848613', 'step': 9869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:44.901589', 'step': 9869, 'epoch': 2} {'type': 'loss', 'content': 0.1331695020198822, 'timestamp': '2025-09-10 02:46:44.903597', 'step': 9870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:44.956988', 'step': 9870, 'epoch': 2} {'type': 'loss', 'content': 0.2702828049659729, 'timestamp': '2025-09-10 02:46:44.960074', 'step': 9871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:45.015788', 'step': 9871, 'epoch': 2} {'type': 'loss', 'content': 0.20957545936107635, 'timestamp': '2025-09-10 02:46:45.021436', 'step': 9872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:45.075168', 'step': 9872, 'epoch': 2} {'type': 'loss', 'content': 0.12306617945432663, 'timestamp': '2025-09-10 02:46:45.078213', 'step': 9873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:45.131701', 'step': 9873, 'epoch': 2} {'type': 'loss', 'content': 0.12517431378364563, 'timestamp': '2025-09-10 02:46:45.133406', 'step': 9874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:45.194747', 'step': 9874, 'epoch': 2} {'type': 'loss', 'content': 0.1674187183380127, 'timestamp': '2025-09-10 02:46:45.203031', 'step': 9875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:45.260171', 'step': 9875, 'epoch': 2} {'type': 'loss', 'content': 0.20421552658081055, 'timestamp': '2025-09-10 02:46:45.265616', 'step': 9876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:45.328265', 'step': 9876, 'epoch': 2} {'type': 'loss', 'content': 0.13751529157161713, 'timestamp': '2025-09-10 02:46:45.329924', 'step': 9877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:45.392737', 'step': 9877, 'epoch': 2} {'type': 'loss', 'content': 0.2192617654800415, 'timestamp': '2025-09-10 02:46:45.395783', 'step': 9878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:45.450709', 'step': 9878, 'epoch': 2} {'type': 'loss', 'content': 0.14047583937644958, 'timestamp': '2025-09-10 02:46:45.452526', 'step': 9879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:45.506840', 'step': 9879, 'epoch': 2} {'type': 'loss', 'content': 0.10566713660955429, 'timestamp': '2025-09-10 02:46:45.512285', 'step': 9880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:45.566195', 'step': 9880, 'epoch': 2} {'type': 'loss', 'content': 0.14657771587371826, 'timestamp': '2025-09-10 02:46:45.568014', 'step': 9881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:45.622035', 'step': 9881, 'epoch': 2} {'type': 'loss', 'content': 0.1248101219534874, 'timestamp': '2025-09-10 02:46:45.623715', 'step': 9882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:45.678746', 'step': 9882, 'epoch': 2} {'type': 'loss', 'content': 0.15739719569683075, 'timestamp': '2025-09-10 02:46:45.680454', 'step': 9883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:45.735609', 'step': 9883, 'epoch': 2} {'type': 'loss', 'content': 0.09746074676513672, 'timestamp': '2025-09-10 02:46:45.741077', 'step': 9884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:45.795738', 'step': 9884, 'epoch': 2} {'type': 'loss', 'content': 0.15196260809898376, 'timestamp': '2025-09-10 02:46:45.797390', 'step': 9885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:45.851500', 'step': 9885, 'epoch': 2} {'type': 'loss', 'content': 0.1629977971315384, 'timestamp': '2025-09-10 02:46:45.853272', 'step': 9886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:45.905990', 'step': 9886, 'epoch': 2} {'type': 'loss', 'content': 0.09267833083868027, 'timestamp': '2025-09-10 02:46:45.907741', 'step': 9887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:45.961854', 'step': 9887, 'epoch': 2} {'type': 'loss', 'content': 0.2083691954612732, 'timestamp': '2025-09-10 02:46:45.967519', 'step': 9888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.021289', 'step': 9888, 'epoch': 2} {'type': 'loss', 'content': 0.12643365561962128, 'timestamp': '2025-09-10 02:46:46.023066', 'step': 9889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:46.077619', 'step': 9889, 'epoch': 2} {'type': 'loss', 'content': 0.0926247090101242, 'timestamp': '2025-09-10 02:46:46.079777', 'step': 9890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.135741', 'step': 9890, 'epoch': 2} {'type': 'loss', 'content': 0.1599922478199005, 'timestamp': '2025-09-10 02:46:46.137425', 'step': 9891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.191638', 'step': 9891, 'epoch': 2} {'type': 'loss', 'content': 0.12706664204597473, 'timestamp': '2025-09-10 02:46:46.197170', 'step': 9892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.251317', 'step': 9892, 'epoch': 2} {'type': 'loss', 'content': 0.11733295768499374, 'timestamp': '2025-09-10 02:46:46.253075', 'step': 9893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:46.307498', 'step': 9893, 'epoch': 2} {'type': 'loss', 'content': 0.10063464194536209, 'timestamp': '2025-09-10 02:46:46.309158', 'step': 9894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.362643', 'step': 9894, 'epoch': 2} {'type': 'loss', 'content': 0.22933617234230042, 'timestamp': '2025-09-10 02:46:46.364684', 'step': 9895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.417371', 'step': 9895, 'epoch': 2} {'type': 'loss', 'content': 0.09603294730186462, 'timestamp': '2025-09-10 02:46:46.423163', 'step': 9896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.476267', 'step': 9896, 'epoch': 2} {'type': 'loss', 'content': 0.09994526207447052, 'timestamp': '2025-09-10 02:46:46.478305', 'step': 9897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.532607', 'step': 9897, 'epoch': 2} {'type': 'loss', 'content': 0.12933531403541565, 'timestamp': '2025-09-10 02:46:46.534332', 'step': 9898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:46.589719', 'step': 9898, 'epoch': 2} {'type': 'loss', 'content': 0.13431833684444427, 'timestamp': '2025-09-10 02:46:46.591459', 'step': 9899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:46.644835', 'step': 9899, 'epoch': 2} {'type': 'loss', 'content': 0.13938891887664795, 'timestamp': '2025-09-10 02:46:46.650509', 'step': 9900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.703952', 'step': 9900, 'epoch': 2} {'type': 'loss', 'content': 0.19471119344234467, 'timestamp': '2025-09-10 02:46:46.705974', 'step': 9901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:46.759065', 'step': 9901, 'epoch': 2} {'type': 'loss', 'content': 0.1703052669763565, 'timestamp': '2025-09-10 02:46:46.761029', 'step': 9902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:46.815547', 'step': 9902, 'epoch': 2} {'type': 'loss', 'content': 0.13840937614440918, 'timestamp': '2025-09-10 02:46:46.817538', 'step': 9903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:46.871399', 'step': 9903, 'epoch': 2} {'type': 'loss', 'content': 0.20857740938663483, 'timestamp': '2025-09-10 02:46:46.877184', 'step': 9904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:46.929389', 'step': 9904, 'epoch': 2} {'type': 'loss', 'content': 0.017994899302721024, 'timestamp': '2025-09-10 02:46:46.931398', 'step': 9905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:46.984660', 'step': 9905, 'epoch': 2} {'type': 'loss', 'content': 0.1483525186777115, 'timestamp': '2025-09-10 02:46:46.986731', 'step': 9906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:47.039857', 'step': 9906, 'epoch': 2} {'type': 'loss', 'content': 0.12336805462837219, 'timestamp': '2025-09-10 02:46:47.041961', 'step': 9907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:47.095173', 'step': 9907, 'epoch': 2} {'type': 'loss', 'content': 0.14452537894248962, 'timestamp': '2025-09-10 02:46:47.101060', 'step': 9908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:47.153164', 'step': 9908, 'epoch': 2} {'type': 'loss', 'content': 0.10342224687337875, 'timestamp': '2025-09-10 02:46:47.155331', 'step': 9909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:47.208662', 'step': 9909, 'epoch': 2} {'type': 'loss', 'content': 0.12385948747396469, 'timestamp': '2025-09-10 02:46:47.210719', 'step': 9910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:47.263326', 'step': 9910, 'epoch': 2} {'type': 'loss', 'content': 0.08191332221031189, 'timestamp': '2025-09-10 02:46:47.265306', 'step': 9911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:47.320082', 'step': 9911, 'epoch': 2} {'type': 'loss', 'content': 0.18524591624736786, 'timestamp': '2025-09-10 02:46:47.325813', 'step': 9912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:47.379942', 'step': 9912, 'epoch': 2} {'type': 'loss', 'content': 0.12488863617181778, 'timestamp': '2025-09-10 02:46:47.381949', 'step': 9913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:47.436034', 'step': 9913, 'epoch': 2} {'type': 'loss', 'content': 0.07614533603191376, 'timestamp': '2025-09-10 02:46:47.437979', 'step': 9914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:47.492119', 'step': 9914, 'epoch': 2} {'type': 'loss', 'content': 0.12892615795135498, 'timestamp': '2025-09-10 02:46:47.494080', 'step': 9915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:47.547436', 'step': 9915, 'epoch': 2} {'type': 'loss', 'content': 0.14800646901130676, 'timestamp': '2025-09-10 02:46:47.553309', 'step': 9916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:47.605725', 'step': 9916, 'epoch': 2} {'type': 'loss', 'content': 0.12094806134700775, 'timestamp': '2025-09-10 02:46:47.607897', 'step': 9917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:47.660676', 'step': 9917, 'epoch': 2} {'type': 'loss', 'content': 0.10824790596961975, 'timestamp': '2025-09-10 02:46:47.662668', 'step': 9918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:47.715926', 'step': 9918, 'epoch': 2} {'type': 'loss', 'content': 0.14519472420215607, 'timestamp': '2025-09-10 02:46:47.717937', 'step': 9919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:47.771427', 'step': 9919, 'epoch': 2} {'type': 'loss', 'content': 0.1036318764090538, 'timestamp': '2025-09-10 02:46:47.777257', 'step': 9920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:47.830517', 'step': 9920, 'epoch': 2} {'type': 'loss', 'content': 0.16038280725479126, 'timestamp': '2025-09-10 02:46:47.832586', 'step': 9921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:47.886321', 'step': 9921, 'epoch': 2} {'type': 'loss', 'content': 0.1334027796983719, 'timestamp': '2025-09-10 02:46:47.888469', 'step': 9922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:47.942379', 'step': 9922, 'epoch': 2} {'type': 'loss', 'content': 0.18649150431156158, 'timestamp': '2025-09-10 02:46:47.944440', 'step': 9923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:47.997974', 'step': 9923, 'epoch': 2} {'type': 'loss', 'content': 0.11751735955476761, 'timestamp': '2025-09-10 02:46:48.003839', 'step': 9924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:48.056266', 'step': 9924, 'epoch': 2} {'type': 'loss', 'content': 0.19763632118701935, 'timestamp': '2025-09-10 02:46:48.058225', 'step': 9925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:48.112340', 'step': 9925, 'epoch': 2} {'type': 'loss', 'content': 0.11283379793167114, 'timestamp': '2025-09-10 02:46:48.114410', 'step': 9926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:48.167505', 'step': 9926, 'epoch': 2} {'type': 'loss', 'content': 0.0905291810631752, 'timestamp': '2025-09-10 02:46:48.169565', 'step': 9927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:48.222996', 'step': 9927, 'epoch': 2} {'type': 'loss', 'content': 0.16862788796424866, 'timestamp': '2025-09-10 02:46:48.228745', 'step': 9928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:48.280661', 'step': 9928, 'epoch': 2} {'type': 'loss', 'content': 0.0814124122262001, 'timestamp': '2025-09-10 02:46:48.282607', 'step': 9929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:48.334958', 'step': 9929, 'epoch': 2} {'type': 'loss', 'content': 0.27376851439476013, 'timestamp': '2025-09-10 02:46:48.336935', 'step': 9930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:48.390836', 'step': 9930, 'epoch': 2} {'type': 'loss', 'content': 0.08641792088747025, 'timestamp': '2025-09-10 02:46:48.393028', 'step': 9931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:48.446052', 'step': 9931, 'epoch': 2} {'type': 'loss', 'content': 0.11101416498422623, 'timestamp': '2025-09-10 02:46:48.451853', 'step': 9932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:48.504632', 'step': 9932, 'epoch': 2} {'type': 'loss', 'content': 0.10560901463031769, 'timestamp': '2025-09-10 02:46:48.506589', 'step': 9933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:48.559465', 'step': 9933, 'epoch': 2} {'type': 'loss', 'content': 0.1913059800863266, 'timestamp': '2025-09-10 02:46:48.561378', 'step': 9934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:48.616944', 'step': 9934, 'epoch': 2} {'type': 'loss', 'content': 0.07391940802335739, 'timestamp': '2025-09-10 02:46:48.619300', 'step': 9935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:48.673062', 'step': 9935, 'epoch': 2} {'type': 'loss', 'content': 0.16178059577941895, 'timestamp': '2025-09-10 02:46:48.679136', 'step': 9936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:48.732208', 'step': 9936, 'epoch': 2} {'type': 'loss', 'content': 0.18054001033306122, 'timestamp': '2025-09-10 02:46:48.734327', 'step': 9937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:48.789323', 'step': 9937, 'epoch': 2} {'type': 'loss', 'content': 0.08008868247270584, 'timestamp': '2025-09-10 02:46:48.791318', 'step': 9938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:48.846770', 'step': 9938, 'epoch': 2} {'type': 'loss', 'content': 0.11901569366455078, 'timestamp': '2025-09-10 02:46:48.848841', 'step': 9939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:48.905115', 'step': 9939, 'epoch': 2} {'type': 'loss', 'content': 0.07943134009838104, 'timestamp': '2025-09-10 02:46:48.911222', 'step': 9940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:48.964714', 'step': 9940, 'epoch': 2} {'type': 'loss', 'content': 0.19093723595142365, 'timestamp': '2025-09-10 02:46:48.966653', 'step': 9941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:49.021261', 'step': 9941, 'epoch': 2} {'type': 'loss', 'content': 0.10904572904109955, 'timestamp': '2025-09-10 02:46:49.023273', 'step': 9942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:49.080732', 'step': 9942, 'epoch': 2} {'type': 'loss', 'content': 0.1670403927564621, 'timestamp': '2025-09-10 02:46:49.082661', 'step': 9943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:49.136735', 'step': 9943, 'epoch': 2} {'type': 'loss', 'content': 0.17159345746040344, 'timestamp': '2025-09-10 02:46:49.142962', 'step': 9944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:49.196251', 'step': 9944, 'epoch': 2} {'type': 'loss', 'content': 0.18032626807689667, 'timestamp': '2025-09-10 02:46:49.198288', 'step': 9945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:49.253118', 'step': 9945, 'epoch': 2} {'type': 'loss', 'content': 0.18701380491256714, 'timestamp': '2025-09-10 02:46:49.255050', 'step': 9946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:49.308694', 'step': 9946, 'epoch': 2} {'type': 'loss', 'content': 0.08343017846345901, 'timestamp': '2025-09-10 02:46:49.310710', 'step': 9947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:49.364791', 'step': 9947, 'epoch': 2} {'type': 'loss', 'content': 0.1626073271036148, 'timestamp': '2025-09-10 02:46:49.370661', 'step': 9948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:49.424234', 'step': 9948, 'epoch': 2} {'type': 'loss', 'content': 0.15383939445018768, 'timestamp': '2025-09-10 02:46:49.426286', 'step': 9949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:49.480016', 'step': 9949, 'epoch': 2} {'type': 'loss', 'content': 0.11810053884983063, 'timestamp': '2025-09-10 02:46:49.482120', 'step': 9950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:49.537092', 'step': 9950, 'epoch': 2} {'type': 'loss', 'content': 0.13872388005256653, 'timestamp': '2025-09-10 02:46:49.539155', 'step': 9951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:49.592634', 'step': 9951, 'epoch': 2} {'type': 'loss', 'content': 0.10711266100406647, 'timestamp': '2025-09-10 02:46:49.598656', 'step': 9952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:49.650779', 'step': 9952, 'epoch': 2} {'type': 'loss', 'content': 0.07337125390768051, 'timestamp': '2025-09-10 02:46:49.652857', 'step': 9953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:49.705770', 'step': 9953, 'epoch': 2} {'type': 'loss', 'content': 0.23519809544086456, 'timestamp': '2025-09-10 02:46:49.707802', 'step': 9954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:49.761239', 'step': 9954, 'epoch': 2} {'type': 'loss', 'content': 0.1974327117204666, 'timestamp': '2025-09-10 02:46:49.763170', 'step': 9955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:49.815969', 'step': 9955, 'epoch': 2} {'type': 'loss', 'content': 0.09653463959693909, 'timestamp': '2025-09-10 02:46:49.821883', 'step': 9956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:49.875498', 'step': 9956, 'epoch': 2} {'type': 'loss', 'content': 0.1411845088005066, 'timestamp': '2025-09-10 02:46:49.877478', 'step': 9957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:49.930499', 'step': 9957, 'epoch': 2} {'type': 'loss', 'content': 0.1367911547422409, 'timestamp': '2025-09-10 02:46:49.932452', 'step': 9958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:49.985701', 'step': 9958, 'epoch': 2} {'type': 'loss', 'content': 0.09453116357326508, 'timestamp': '2025-09-10 02:46:49.987632', 'step': 9959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:50.040541', 'step': 9959, 'epoch': 2} {'type': 'loss', 'content': 0.15322040021419525, 'timestamp': '2025-09-10 02:46:50.046456', 'step': 9960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:50.100164', 'step': 9960, 'epoch': 2} {'type': 'loss', 'content': 0.14641617238521576, 'timestamp': '2025-09-10 02:46:50.102125', 'step': 9961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:50.155679', 'step': 9961, 'epoch': 2} {'type': 'loss', 'content': 0.16487963497638702, 'timestamp': '2025-09-10 02:46:50.157806', 'step': 9962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:50.211685', 'step': 9962, 'epoch': 2} {'type': 'loss', 'content': 0.0673566684126854, 'timestamp': '2025-09-10 02:46:50.213607', 'step': 9963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:50.267343', 'step': 9963, 'epoch': 2} {'type': 'loss', 'content': 0.10578026622533798, 'timestamp': '2025-09-10 02:46:50.273310', 'step': 9964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:50.325592', 'step': 9964, 'epoch': 2} {'type': 'loss', 'content': 0.19440564513206482, 'timestamp': '2025-09-10 02:46:50.327812', 'step': 9965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:50.380978', 'step': 9965, 'epoch': 2} {'type': 'loss', 'content': 0.0846397653222084, 'timestamp': '2025-09-10 02:46:50.383014', 'step': 9966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:50.436553', 'step': 9966, 'epoch': 2} {'type': 'loss', 'content': 0.07331513613462448, 'timestamp': '2025-09-10 02:46:50.438593', 'step': 9967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:50.493887', 'step': 9967, 'epoch': 2} {'type': 'loss', 'content': 0.23855431377887726, 'timestamp': '2025-09-10 02:46:50.500049', 'step': 9968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:50.553691', 'step': 9968, 'epoch': 2} {'type': 'loss', 'content': 0.06307659298181534, 'timestamp': '2025-09-10 02:46:50.555627', 'step': 9969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:50.609195', 'step': 9969, 'epoch': 2} {'type': 'loss', 'content': 0.11929121613502502, 'timestamp': '2025-09-10 02:46:50.611204', 'step': 9970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:50.664763', 'step': 9970, 'epoch': 2} {'type': 'loss', 'content': 0.1555706262588501, 'timestamp': '2025-09-10 02:46:50.666958', 'step': 9971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:50.719708', 'step': 9971, 'epoch': 2} {'type': 'loss', 'content': 0.11267487704753876, 'timestamp': '2025-09-10 02:46:50.725801', 'step': 9972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:50.778657', 'step': 9972, 'epoch': 2} {'type': 'loss', 'content': 0.24022309482097626, 'timestamp': '2025-09-10 02:46:50.780925', 'step': 9973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:50.833852', 'step': 9973, 'epoch': 2} {'type': 'loss', 'content': 0.1498287320137024, 'timestamp': '2025-09-10 02:46:50.835842', 'step': 9974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:50.889700', 'step': 9974, 'epoch': 2} {'type': 'loss', 'content': 0.14606653153896332, 'timestamp': '2025-09-10 02:46:50.891655', 'step': 9975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:50.944840', 'step': 9975, 'epoch': 2} {'type': 'loss', 'content': 0.15045717358589172, 'timestamp': '2025-09-10 02:46:50.950667', 'step': 9976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:51.005466', 'step': 9976, 'epoch': 2} {'type': 'loss', 'content': 0.20135052502155304, 'timestamp': '2025-09-10 02:46:51.007458', 'step': 9977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:51.061676', 'step': 9977, 'epoch': 2} {'type': 'loss', 'content': 0.08115179091691971, 'timestamp': '2025-09-10 02:46:51.063335', 'step': 9978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:51.117631', 'step': 9978, 'epoch': 2} {'type': 'loss', 'content': 0.14335547387599945, 'timestamp': '2025-09-10 02:46:51.119597', 'step': 9979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:51.175030', 'step': 9979, 'epoch': 2} {'type': 'loss', 'content': 0.11426851898431778, 'timestamp': '2025-09-10 02:46:51.180847', 'step': 9980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:51.234010', 'step': 9980, 'epoch': 2} {'type': 'loss', 'content': 0.23190274834632874, 'timestamp': '2025-09-10 02:46:51.235728', 'step': 9981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:51.289154', 'step': 9981, 'epoch': 2} {'type': 'loss', 'content': 0.0684853047132492, 'timestamp': '2025-09-10 02:46:51.294204', 'step': 9982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:51.356630', 'step': 9982, 'epoch': 2} {'type': 'loss', 'content': 0.09502138942480087, 'timestamp': '2025-09-10 02:46:51.358601', 'step': 9983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:51.411705', 'step': 9983, 'epoch': 2} {'type': 'loss', 'content': 0.11194826662540436, 'timestamp': '2025-09-10 02:46:51.417535', 'step': 9984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:51.469985', 'step': 9984, 'epoch': 2} {'type': 'loss', 'content': 0.1569838523864746, 'timestamp': '2025-09-10 02:46:51.471969', 'step': 9985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:51.525600', 'step': 9985, 'epoch': 2} {'type': 'loss', 'content': 0.08703606575727463, 'timestamp': '2025-09-10 02:46:51.527576', 'step': 9986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:51.580543', 'step': 9986, 'epoch': 2} {'type': 'loss', 'content': 0.15948781371116638, 'timestamp': '2025-09-10 02:46:51.582477', 'step': 9987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:51.636239', 'step': 9987, 'epoch': 2} {'type': 'loss', 'content': 0.17069238424301147, 'timestamp': '2025-09-10 02:46:51.642067', 'step': 9988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:51.695577', 'step': 9988, 'epoch': 2} {'type': 'loss', 'content': 0.16425879299640656, 'timestamp': '2025-09-10 02:46:51.697776', 'step': 9989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:51.751891', 'step': 9989, 'epoch': 2} {'type': 'loss', 'content': 0.16558542847633362, 'timestamp': '2025-09-10 02:46:51.755114', 'step': 9990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:51.810106', 'step': 9990, 'epoch': 2} {'type': 'loss', 'content': 0.15920798480510712, 'timestamp': '2025-09-10 02:46:51.812095', 'step': 9991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:51.864995', 'step': 9991, 'epoch': 2} {'type': 'loss', 'content': 0.15760783851146698, 'timestamp': '2025-09-10 02:46:51.870802', 'step': 9992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:51.925174', 'step': 9992, 'epoch': 2} {'type': 'loss', 'content': 0.11787204444408417, 'timestamp': '2025-09-10 02:46:51.928777', 'step': 9993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:51.985107', 'step': 9993, 'epoch': 2} {'type': 'loss', 'content': 0.10980864614248276, 'timestamp': '2025-09-10 02:46:51.987172', 'step': 9994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:52.041231', 'step': 9994, 'epoch': 2} {'type': 'loss', 'content': 0.13694749772548676, 'timestamp': '2025-09-10 02:46:52.048657', 'step': 9995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:52.108642', 'step': 9995, 'epoch': 2} {'type': 'loss', 'content': 0.173044815659523, 'timestamp': '2025-09-10 02:46:52.117287', 'step': 9996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:52.179959', 'step': 9996, 'epoch': 2} {'type': 'loss', 'content': 0.05840659886598587, 'timestamp': '2025-09-10 02:46:52.182026', 'step': 9997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:52.244140', 'step': 9997, 'epoch': 2} {'type': 'loss', 'content': 0.07073809206485748, 'timestamp': '2025-09-10 02:46:52.246919', 'step': 9998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:52.310630', 'step': 9998, 'epoch': 2} {'type': 'loss', 'content': 0.14677341282367706, 'timestamp': '2025-09-10 02:46:52.315429', 'step': 9999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:52.371831', 'step': 9999, 'epoch': 2} {'type': 'loss', 'content': 0.10301658511161804, 'timestamp': '2025-09-10 02:46:52.377769', 'step': 10000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10000', 'timestamp': '2025-09-10 02:46:52.847631', 'step': 10000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:52.903967', 'step': 10000, 'epoch': 2} {'type': 'loss', 'content': 0.16319620609283447, 'timestamp': '2025-09-10 02:46:52.905900', 'step': 10001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:52.963185', 'step': 10001, 'epoch': 2} {'type': 'loss', 'content': 0.08668671548366547, 'timestamp': '2025-09-10 02:46:52.965153', 'step': 10002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:53.025903', 'step': 10002, 'epoch': 2} {'type': 'loss', 'content': 0.09065354615449905, 'timestamp': '2025-09-10 02:46:53.027879', 'step': 10003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:53.086246', 'step': 10003, 'epoch': 2} {'type': 'loss', 'content': 0.15119490027427673, 'timestamp': '2025-09-10 02:46:53.092359', 'step': 10004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:53.147445', 'step': 10004, 'epoch': 2} {'type': 'loss', 'content': 0.1323326826095581, 'timestamp': '2025-09-10 02:46:53.149570', 'step': 10005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:53.211664', 'step': 10005, 'epoch': 2} {'type': 'loss', 'content': 0.12883609533309937, 'timestamp': '2025-09-10 02:46:53.213808', 'step': 10006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:53.274425', 'step': 10006, 'epoch': 2} {'type': 'loss', 'content': 0.15724338591098785, 'timestamp': '2025-09-10 02:46:53.276622', 'step': 10007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:53.342540', 'step': 10007, 'epoch': 2} {'type': 'loss', 'content': 0.24081364274024963, 'timestamp': '2025-09-10 02:46:53.348764', 'step': 10008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:53.402196', 'step': 10008, 'epoch': 2} {'type': 'loss', 'content': 0.14625731110572815, 'timestamp': '2025-09-10 02:46:53.404128', 'step': 10009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:53.459516', 'step': 10009, 'epoch': 2} {'type': 'loss', 'content': 0.08667091280221939, 'timestamp': '2025-09-10 02:46:53.461550', 'step': 10010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:53.514719', 'step': 10010, 'epoch': 2} {'type': 'loss', 'content': 0.16272591054439545, 'timestamp': '2025-09-10 02:46:53.516726', 'step': 10011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:53.570562', 'step': 10011, 'epoch': 2} {'type': 'loss', 'content': 0.06365843117237091, 'timestamp': '2025-09-10 02:46:53.576593', 'step': 10012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:53.630747', 'step': 10012, 'epoch': 2} {'type': 'loss', 'content': 0.17504772543907166, 'timestamp': '2025-09-10 02:46:53.638937', 'step': 10013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:53.698373', 'step': 10013, 'epoch': 2} {'type': 'loss', 'content': 0.14236055314540863, 'timestamp': '2025-09-10 02:46:53.701345', 'step': 10014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:53.766803', 'step': 10014, 'epoch': 2} {'type': 'loss', 'content': 0.14392897486686707, 'timestamp': '2025-09-10 02:46:53.768957', 'step': 10015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:53.823027', 'step': 10015, 'epoch': 2} {'type': 'loss', 'content': 0.11046373844146729, 'timestamp': '2025-09-10 02:46:53.828971', 'step': 10016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:53.882335', 'step': 10016, 'epoch': 2} {'type': 'loss', 'content': 0.10313098877668381, 'timestamp': '2025-09-10 02:46:53.884629', 'step': 10017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:53.941356', 'step': 10017, 'epoch': 2} {'type': 'loss', 'content': 0.11290355026721954, 'timestamp': '2025-09-10 02:46:53.945860', 'step': 10018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:54.004522', 'step': 10018, 'epoch': 2} {'type': 'loss', 'content': 0.07482331991195679, 'timestamp': '2025-09-10 02:46:54.006721', 'step': 10019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:54.069833', 'step': 10019, 'epoch': 2} {'type': 'loss', 'content': 0.1746615469455719, 'timestamp': '2025-09-10 02:46:54.076056', 'step': 10020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:54.130705', 'step': 10020, 'epoch': 2} {'type': 'loss', 'content': 0.09054086357355118, 'timestamp': '2025-09-10 02:46:54.132876', 'step': 10021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:54.191854', 'step': 10021, 'epoch': 2} {'type': 'loss', 'content': 0.03925543278455734, 'timestamp': '2025-09-10 02:46:54.193959', 'step': 10022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:54.251725', 'step': 10022, 'epoch': 2} {'type': 'loss', 'content': 0.08672048896551132, 'timestamp': '2025-09-10 02:46:54.254062', 'step': 10023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:54.324504', 'step': 10023, 'epoch': 2} {'type': 'loss', 'content': 0.03722364827990532, 'timestamp': '2025-09-10 02:46:54.330464', 'step': 10024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:54.391159', 'step': 10024, 'epoch': 2} {'type': 'loss', 'content': 0.13015224039554596, 'timestamp': '2025-09-10 02:46:54.393286', 'step': 10025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:54.447923', 'step': 10025, 'epoch': 2} {'type': 'loss', 'content': 0.09561511874198914, 'timestamp': '2025-09-10 02:46:54.449901', 'step': 10026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:54.504303', 'step': 10026, 'epoch': 2} {'type': 'loss', 'content': 0.11351379007101059, 'timestamp': '2025-09-10 02:46:54.506348', 'step': 10027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:54.561952', 'step': 10027, 'epoch': 2} {'type': 'loss', 'content': 0.13133369386196136, 'timestamp': '2025-09-10 02:46:54.568098', 'step': 10028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:54.645538', 'step': 10028, 'epoch': 2} {'type': 'loss', 'content': 0.07550600171089172, 'timestamp': '2025-09-10 02:46:54.647547', 'step': 10029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:54.705605', 'step': 10029, 'epoch': 2} {'type': 'loss', 'content': 0.1058395579457283, 'timestamp': '2025-09-10 02:46:54.707745', 'step': 10030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:54.766493', 'step': 10030, 'epoch': 2} {'type': 'loss', 'content': 0.19381868839263916, 'timestamp': '2025-09-10 02:46:54.768469', 'step': 10031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:54.823304', 'step': 10031, 'epoch': 2} {'type': 'loss', 'content': 0.09959343820810318, 'timestamp': '2025-09-10 02:46:54.829206', 'step': 10032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:54.887516', 'step': 10032, 'epoch': 2} {'type': 'loss', 'content': 0.0840563178062439, 'timestamp': '2025-09-10 02:46:54.889613', 'step': 10033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:54.945897', 'step': 10033, 'epoch': 2} {'type': 'loss', 'content': 0.09940440207719803, 'timestamp': '2025-09-10 02:46:54.948144', 'step': 10034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:55.012298', 'step': 10034, 'epoch': 2} {'type': 'loss', 'content': 0.10141148418188095, 'timestamp': '2025-09-10 02:46:55.014538', 'step': 10035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:55.070889', 'step': 10035, 'epoch': 2} {'type': 'loss', 'content': 0.07320326566696167, 'timestamp': '2025-09-10 02:46:55.076934', 'step': 10036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:55.137517', 'step': 10036, 'epoch': 2} {'type': 'loss', 'content': 0.09706441313028336, 'timestamp': '2025-09-10 02:46:55.139639', 'step': 10037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:55.192414', 'step': 10037, 'epoch': 2} {'type': 'loss', 'content': 0.17369604110717773, 'timestamp': '2025-09-10 02:46:55.194384', 'step': 10038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:55.250438', 'step': 10038, 'epoch': 2} {'type': 'loss', 'content': 0.15386055409908295, 'timestamp': '2025-09-10 02:46:55.252453', 'step': 10039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:55.306437', 'step': 10039, 'epoch': 2} {'type': 'loss', 'content': 0.06746789067983627, 'timestamp': '2025-09-10 02:46:55.312200', 'step': 10040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:55.367488', 'step': 10040, 'epoch': 2} {'type': 'loss', 'content': 0.1250949501991272, 'timestamp': '2025-09-10 02:46:55.369285', 'step': 10041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:55.421641', 'step': 10041, 'epoch': 2} {'type': 'loss', 'content': 0.17210131883621216, 'timestamp': '2025-09-10 02:46:55.423748', 'step': 10042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:55.477078', 'step': 10042, 'epoch': 2} {'type': 'loss', 'content': 0.14257583022117615, 'timestamp': '2025-09-10 02:46:55.479448', 'step': 10043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:55.571630', 'step': 10043, 'epoch': 2} {'type': 'loss', 'content': 0.14501026272773743, 'timestamp': '2025-09-10 02:46:55.577511', 'step': 10044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:55.639200', 'step': 10044, 'epoch': 2} {'type': 'loss', 'content': 0.1318744719028473, 'timestamp': '2025-09-10 02:46:55.640944', 'step': 10045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:55.695686', 'step': 10045, 'epoch': 2} {'type': 'loss', 'content': 0.14445413649082184, 'timestamp': '2025-09-10 02:46:55.697726', 'step': 10046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:55.755609', 'step': 10046, 'epoch': 2} {'type': 'loss', 'content': 0.14388775825500488, 'timestamp': '2025-09-10 02:46:55.757656', 'step': 10047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:55.812223', 'step': 10047, 'epoch': 2} {'type': 'loss', 'content': 0.14699788391590118, 'timestamp': '2025-09-10 02:46:55.818170', 'step': 10048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:55.880228', 'step': 10048, 'epoch': 2} {'type': 'loss', 'content': 0.11062946170568466, 'timestamp': '2025-09-10 02:46:55.882228', 'step': 10049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:55.936529', 'step': 10049, 'epoch': 2} {'type': 'loss', 'content': 0.09445694088935852, 'timestamp': '2025-09-10 02:46:55.938632', 'step': 10050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:56.003910', 'step': 10050, 'epoch': 2} {'type': 'loss', 'content': 0.1402677595615387, 'timestamp': '2025-09-10 02:46:56.006091', 'step': 10051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:56.060320', 'step': 10051, 'epoch': 2} {'type': 'loss', 'content': 0.0875324159860611, 'timestamp': '2025-09-10 02:46:56.066253', 'step': 10052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:56.120885', 'step': 10052, 'epoch': 2} {'type': 'loss', 'content': 0.1439216136932373, 'timestamp': '2025-09-10 02:46:56.122821', 'step': 10053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:56.179315', 'step': 10053, 'epoch': 2} {'type': 'loss', 'content': 0.12669464945793152, 'timestamp': '2025-09-10 02:46:56.181299', 'step': 10054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:56.234676', 'step': 10054, 'epoch': 2} {'type': 'loss', 'content': 0.04775110259652138, 'timestamp': '2025-09-10 02:46:56.236824', 'step': 10055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:56.298333', 'step': 10055, 'epoch': 2} {'type': 'loss', 'content': 0.21431443095207214, 'timestamp': '2025-09-10 02:46:56.304233', 'step': 10056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:56.356850', 'step': 10056, 'epoch': 2} {'type': 'loss', 'content': 0.0439949668943882, 'timestamp': '2025-09-10 02:46:56.358828', 'step': 10057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:56.414440', 'step': 10057, 'epoch': 2} {'type': 'loss', 'content': 0.07625910639762878, 'timestamp': '2025-09-10 02:46:56.416560', 'step': 10058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:56.483893', 'step': 10058, 'epoch': 2} {'type': 'loss', 'content': 0.12301141768693924, 'timestamp': '2025-09-10 02:46:56.487560', 'step': 10059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:56.555658', 'step': 10059, 'epoch': 2} {'type': 'loss', 'content': 0.1398182511329651, 'timestamp': '2025-09-10 02:46:56.561788', 'step': 10060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:56.635679', 'step': 10060, 'epoch': 2} {'type': 'loss', 'content': 0.09685754030942917, 'timestamp': '2025-09-10 02:46:56.637651', 'step': 10061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:56.700334', 'step': 10061, 'epoch': 2} {'type': 'loss', 'content': 0.09443827718496323, 'timestamp': '2025-09-10 02:46:56.702314', 'step': 10062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:56.756682', 'step': 10062, 'epoch': 2} {'type': 'loss', 'content': 0.110869862139225, 'timestamp': '2025-09-10 02:46:56.758678', 'step': 10063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:56.812581', 'step': 10063, 'epoch': 2} {'type': 'loss', 'content': 0.08401267230510712, 'timestamp': '2025-09-10 02:46:56.818532', 'step': 10064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:56.871517', 'step': 10064, 'epoch': 2} {'type': 'loss', 'content': 0.1028137281537056, 'timestamp': '2025-09-10 02:46:56.873624', 'step': 10065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:56.926614', 'step': 10065, 'epoch': 2} {'type': 'loss', 'content': 0.1790113002061844, 'timestamp': '2025-09-10 02:46:56.928726', 'step': 10066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:56.982470', 'step': 10066, 'epoch': 2} {'type': 'loss', 'content': 0.09258787333965302, 'timestamp': '2025-09-10 02:46:56.984519', 'step': 10067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:57.038277', 'step': 10067, 'epoch': 2} {'type': 'loss', 'content': 0.04866156354546547, 'timestamp': '2025-09-10 02:46:57.044065', 'step': 10068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:57.096984', 'step': 10068, 'epoch': 2} {'type': 'loss', 'content': 0.19333089888095856, 'timestamp': '2025-09-10 02:46:57.099347', 'step': 10069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:57.152461', 'step': 10069, 'epoch': 2} {'type': 'loss', 'content': 0.14927998185157776, 'timestamp': '2025-09-10 02:46:57.154477', 'step': 10070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:57.207498', 'step': 10070, 'epoch': 2} {'type': 'loss', 'content': 0.08981331437826157, 'timestamp': '2025-09-10 02:46:57.209556', 'step': 10071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:57.262340', 'step': 10071, 'epoch': 2} {'type': 'loss', 'content': 0.10686648637056351, 'timestamp': '2025-09-10 02:46:57.268133', 'step': 10072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:57.320444', 'step': 10072, 'epoch': 2} {'type': 'loss', 'content': 0.12147906422615051, 'timestamp': '2025-09-10 02:46:57.322541', 'step': 10073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:57.375757', 'step': 10073, 'epoch': 2} {'type': 'loss', 'content': 0.09880771487951279, 'timestamp': '2025-09-10 02:46:57.377790', 'step': 10074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:57.447310', 'step': 10074, 'epoch': 2} {'type': 'loss', 'content': 0.11291204392910004, 'timestamp': '2025-09-10 02:46:57.449567', 'step': 10075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:57.523880', 'step': 10075, 'epoch': 2} {'type': 'loss', 'content': 0.15356217324733734, 'timestamp': '2025-09-10 02:46:57.530019', 'step': 10076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:57.586288', 'step': 10076, 'epoch': 2} {'type': 'loss', 'content': 0.2502768039703369, 'timestamp': '2025-09-10 02:46:57.589471', 'step': 10077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:57.662655', 'step': 10077, 'epoch': 2} {'type': 'loss', 'content': 0.08921351283788681, 'timestamp': '2025-09-10 02:46:57.665144', 'step': 10078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:57.721853', 'step': 10078, 'epoch': 2} {'type': 'loss', 'content': 0.17128464579582214, 'timestamp': '2025-09-10 02:46:57.723969', 'step': 10079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:57.779333', 'step': 10079, 'epoch': 2} {'type': 'loss', 'content': 0.13471619784832, 'timestamp': '2025-09-10 02:46:57.785376', 'step': 10080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:57.843780', 'step': 10080, 'epoch': 2} {'type': 'loss', 'content': 0.08242083340883255, 'timestamp': '2025-09-10 02:46:57.845722', 'step': 10081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:57.901646', 'step': 10081, 'epoch': 2} {'type': 'loss', 'content': 0.06569948047399521, 'timestamp': '2025-09-10 02:46:57.903598', 'step': 10082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:57.956529', 'step': 10082, 'epoch': 2} {'type': 'loss', 'content': 0.13319985568523407, 'timestamp': '2025-09-10 02:46:57.958478', 'step': 10083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:58.011489', 'step': 10083, 'epoch': 2} {'type': 'loss', 'content': 0.1788400113582611, 'timestamp': '2025-09-10 02:46:58.017162', 'step': 10084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:58.069638', 'step': 10084, 'epoch': 2} {'type': 'loss', 'content': 0.10083355009555817, 'timestamp': '2025-09-10 02:46:58.071670', 'step': 10085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:58.124695', 'step': 10085, 'epoch': 2} {'type': 'loss', 'content': 0.093199223279953, 'timestamp': '2025-09-10 02:46:58.126891', 'step': 10086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:58.180826', 'step': 10086, 'epoch': 2} {'type': 'loss', 'content': 0.12334544211626053, 'timestamp': '2025-09-10 02:46:58.182939', 'step': 10087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:58.237830', 'step': 10087, 'epoch': 2} {'type': 'loss', 'content': 0.08177904784679413, 'timestamp': '2025-09-10 02:46:58.243489', 'step': 10088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:46:58.296821', 'step': 10088, 'epoch': 2} {'type': 'loss', 'content': 0.12474870681762695, 'timestamp': '2025-09-10 02:46:58.298794', 'step': 10089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:58.358852', 'step': 10089, 'epoch': 2} {'type': 'loss', 'content': 0.05428893864154816, 'timestamp': '2025-09-10 02:46:58.362181', 'step': 10090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:58.419485', 'step': 10090, 'epoch': 2} {'type': 'loss', 'content': 0.17455635964870453, 'timestamp': '2025-09-10 02:46:58.421698', 'step': 10091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:58.478588', 'step': 10091, 'epoch': 2} {'type': 'loss', 'content': 0.09325671941041946, 'timestamp': '2025-09-10 02:46:58.484792', 'step': 10092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:58.546988', 'step': 10092, 'epoch': 2} {'type': 'loss', 'content': 0.1696237027645111, 'timestamp': '2025-09-10 02:46:58.549131', 'step': 10093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:58.604401', 'step': 10093, 'epoch': 2} {'type': 'loss', 'content': 0.1362457424402237, 'timestamp': '2025-09-10 02:46:58.606590', 'step': 10094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:58.660511', 'step': 10094, 'epoch': 2} {'type': 'loss', 'content': 0.10675497353076935, 'timestamp': '2025-09-10 02:46:58.662584', 'step': 10095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:58.716561', 'step': 10095, 'epoch': 2} {'type': 'loss', 'content': 0.14035163819789886, 'timestamp': '2025-09-10 02:46:58.722451', 'step': 10096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:58.780318', 'step': 10096, 'epoch': 2} {'type': 'loss', 'content': 0.08693039417266846, 'timestamp': '2025-09-10 02:46:58.782317', 'step': 10097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:58.850059', 'step': 10097, 'epoch': 2} {'type': 'loss', 'content': 0.10010901093482971, 'timestamp': '2025-09-10 02:46:58.852093', 'step': 10098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:58.912952', 'step': 10098, 'epoch': 2} {'type': 'loss', 'content': 0.10413500666618347, 'timestamp': '2025-09-10 02:46:58.915140', 'step': 10099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:58.973363', 'step': 10099, 'epoch': 2} {'type': 'loss', 'content': 0.11982820928096771, 'timestamp': '2025-09-10 02:46:58.979449', 'step': 10100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:59.035500', 'step': 10100, 'epoch': 2} {'type': 'loss', 'content': 0.14496111869812012, 'timestamp': '2025-09-10 02:46:59.037474', 'step': 10101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:59.095364', 'step': 10101, 'epoch': 2} {'type': 'loss', 'content': 0.12061412632465363, 'timestamp': '2025-09-10 02:46:59.097640', 'step': 10102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:59.157218', 'step': 10102, 'epoch': 2} {'type': 'loss', 'content': 0.10156304389238358, 'timestamp': '2025-09-10 02:46:59.159480', 'step': 10103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:59.215915', 'step': 10103, 'epoch': 2} {'type': 'loss', 'content': 0.06379014998674393, 'timestamp': '2025-09-10 02:46:59.222458', 'step': 10104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:59.286319', 'step': 10104, 'epoch': 2} {'type': 'loss', 'content': 0.10497331619262695, 'timestamp': '2025-09-10 02:46:59.288599', 'step': 10105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:59.345475', 'step': 10105, 'epoch': 2} {'type': 'loss', 'content': 0.09225975722074509, 'timestamp': '2025-09-10 02:46:59.347695', 'step': 10106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:59.406364', 'step': 10106, 'epoch': 2} {'type': 'loss', 'content': 0.1121254414319992, 'timestamp': '2025-09-10 02:46:59.408841', 'step': 10107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:59.462563', 'step': 10107, 'epoch': 2} {'type': 'loss', 'content': 0.11166535317897797, 'timestamp': '2025-09-10 02:46:59.469001', 'step': 10108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:59.523063', 'step': 10108, 'epoch': 2} {'type': 'loss', 'content': 0.08496449887752533, 'timestamp': '2025-09-10 02:46:59.525438', 'step': 10109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:59.579803', 'step': 10109, 'epoch': 2} {'type': 'loss', 'content': 0.1564747840166092, 'timestamp': '2025-09-10 02:46:59.582073', 'step': 10110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:59.636383', 'step': 10110, 'epoch': 2} {'type': 'loss', 'content': 0.15875692665576935, 'timestamp': '2025-09-10 02:46:59.638568', 'step': 10111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:59.692252', 'step': 10111, 'epoch': 2} {'type': 'loss', 'content': 0.11620118468999863, 'timestamp': '2025-09-10 02:46:59.698381', 'step': 10112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:46:59.752635', 'step': 10112, 'epoch': 2} {'type': 'loss', 'content': 0.1159677505493164, 'timestamp': '2025-09-10 02:46:59.754845', 'step': 10113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:59.809943', 'step': 10113, 'epoch': 2} {'type': 'loss', 'content': 0.04387165233492851, 'timestamp': '2025-09-10 02:46:59.812057', 'step': 10114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:59.867059', 'step': 10114, 'epoch': 2} {'type': 'loss', 'content': 0.12125052511692047, 'timestamp': '2025-09-10 02:46:59.869303', 'step': 10115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:46:59.925443', 'step': 10115, 'epoch': 2} {'type': 'loss', 'content': 0.08849922567605972, 'timestamp': '2025-09-10 02:46:59.931683', 'step': 10116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:59.986507', 'step': 10116, 'epoch': 2} {'type': 'loss', 'content': 0.07848040014505386, 'timestamp': '2025-09-10 02:46:59.988753', 'step': 10117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:00.044789', 'step': 10117, 'epoch': 2} {'type': 'loss', 'content': 0.07122626900672913, 'timestamp': '2025-09-10 02:47:00.047022', 'step': 10118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:00.101915', 'step': 10118, 'epoch': 2} {'type': 'loss', 'content': 0.08416341245174408, 'timestamp': '2025-09-10 02:47:00.104016', 'step': 10119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:00.157900', 'step': 10119, 'epoch': 2} {'type': 'loss', 'content': 0.21573160588741302, 'timestamp': '2025-09-10 02:47:00.164204', 'step': 10120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:00.218206', 'step': 10120, 'epoch': 2} {'type': 'loss', 'content': 0.22689573466777802, 'timestamp': '2025-09-10 02:47:00.220331', 'step': 10121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:00.274798', 'step': 10121, 'epoch': 2} {'type': 'loss', 'content': 0.12759929895401, 'timestamp': '2025-09-10 02:47:00.277229', 'step': 10122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:00.331556', 'step': 10122, 'epoch': 2} {'type': 'loss', 'content': 0.1112109050154686, 'timestamp': '2025-09-10 02:47:00.333630', 'step': 10123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:00.389614', 'step': 10123, 'epoch': 2} {'type': 'loss', 'content': 0.10163309425115585, 'timestamp': '2025-09-10 02:47:00.397226', 'step': 10124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:00.455626', 'step': 10124, 'epoch': 2} {'type': 'loss', 'content': 0.1617364138364792, 'timestamp': '2025-09-10 02:47:00.457585', 'step': 10125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:00.511914', 'step': 10125, 'epoch': 2} {'type': 'loss', 'content': 0.08230933547019958, 'timestamp': '2025-09-10 02:47:00.514110', 'step': 10126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:00.570905', 'step': 10126, 'epoch': 2} {'type': 'loss', 'content': 0.1773303747177124, 'timestamp': '2025-09-10 02:47:00.572886', 'step': 10127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:00.627226', 'step': 10127, 'epoch': 2} {'type': 'loss', 'content': 0.15493015944957733, 'timestamp': '2025-09-10 02:47:00.633235', 'step': 10128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:00.687031', 'step': 10128, 'epoch': 2} {'type': 'loss', 'content': 0.13695672154426575, 'timestamp': '2025-09-10 02:47:00.689245', 'step': 10129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:00.743235', 'step': 10129, 'epoch': 2} {'type': 'loss', 'content': 0.1328750103712082, 'timestamp': '2025-09-10 02:47:00.745215', 'step': 10130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:00.799774', 'step': 10130, 'epoch': 2} {'type': 'loss', 'content': 0.13381870090961456, 'timestamp': '2025-09-10 02:47:00.801924', 'step': 10131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:00.856470', 'step': 10131, 'epoch': 2} {'type': 'loss', 'content': 0.07466457039117813, 'timestamp': '2025-09-10 02:47:00.862676', 'step': 10132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:00.916477', 'step': 10132, 'epoch': 2} {'type': 'loss', 'content': 0.09622865170240402, 'timestamp': '2025-09-10 02:47:00.918625', 'step': 10133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:00.972102', 'step': 10133, 'epoch': 2} {'type': 'loss', 'content': 0.08781398087739944, 'timestamp': '2025-09-10 02:47:00.974014', 'step': 10134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:01.027841', 'step': 10134, 'epoch': 2} {'type': 'loss', 'content': 0.16113486886024475, 'timestamp': '2025-09-10 02:47:01.029818', 'step': 10135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:01.083239', 'step': 10135, 'epoch': 2} {'type': 'loss', 'content': 0.08115638047456741, 'timestamp': '2025-09-10 02:47:01.089557', 'step': 10136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:01.142356', 'step': 10136, 'epoch': 2} {'type': 'loss', 'content': 0.11621961742639542, 'timestamp': '2025-09-10 02:47:01.144727', 'step': 10137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:01.198256', 'step': 10137, 'epoch': 2} {'type': 'loss', 'content': 0.08851377665996552, 'timestamp': '2025-09-10 02:47:01.200389', 'step': 10138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:01.254727', 'step': 10138, 'epoch': 2} {'type': 'loss', 'content': 0.08007878810167313, 'timestamp': '2025-09-10 02:47:01.256742', 'step': 10139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:01.310397', 'step': 10139, 'epoch': 2} {'type': 'loss', 'content': 0.1464080959558487, 'timestamp': '2025-09-10 02:47:01.316268', 'step': 10140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:01.368877', 'step': 10140, 'epoch': 2} {'type': 'loss', 'content': 0.06412126123905182, 'timestamp': '2025-09-10 02:47:01.371000', 'step': 10141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:01.425774', 'step': 10141, 'epoch': 2} {'type': 'loss', 'content': 0.12315896898508072, 'timestamp': '2025-09-10 02:47:01.427747', 'step': 10142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:01.482220', 'step': 10142, 'epoch': 2} {'type': 'loss', 'content': 0.16420099139213562, 'timestamp': '2025-09-10 02:47:01.484197', 'step': 10143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:01.540153', 'step': 10143, 'epoch': 2} {'type': 'loss', 'content': 0.12068966776132584, 'timestamp': '2025-09-10 02:47:01.546231', 'step': 10144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:01.599956', 'step': 10144, 'epoch': 2} {'type': 'loss', 'content': 0.06913328915834427, 'timestamp': '2025-09-10 02:47:01.601899', 'step': 10145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:01.658399', 'step': 10145, 'epoch': 2} {'type': 'loss', 'content': 0.10016514360904694, 'timestamp': '2025-09-10 02:47:01.660575', 'step': 10146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:01.715074', 'step': 10146, 'epoch': 2} {'type': 'loss', 'content': 0.10516547411680222, 'timestamp': '2025-09-10 02:47:01.717051', 'step': 10147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:01.771663', 'step': 10147, 'epoch': 2} {'type': 'loss', 'content': 0.1034822091460228, 'timestamp': '2025-09-10 02:47:01.777842', 'step': 10148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:01.832400', 'step': 10148, 'epoch': 2} {'type': 'loss', 'content': 0.08918356150388718, 'timestamp': '2025-09-10 02:47:01.834647', 'step': 10149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:01.888906', 'step': 10149, 'epoch': 2} {'type': 'loss', 'content': 0.14917366206645966, 'timestamp': '2025-09-10 02:47:01.890902', 'step': 10150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:01.947363', 'step': 10150, 'epoch': 2} {'type': 'loss', 'content': 0.1821892261505127, 'timestamp': '2025-09-10 02:47:01.949503', 'step': 10151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:02.004128', 'step': 10151, 'epoch': 2} {'type': 'loss', 'content': 0.1869743913412094, 'timestamp': '2025-09-10 02:47:02.010524', 'step': 10152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:02.066939', 'step': 10152, 'epoch': 2} {'type': 'loss', 'content': 0.075621098279953, 'timestamp': '2025-09-10 02:47:02.069192', 'step': 10153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:02.124001', 'step': 10153, 'epoch': 2} {'type': 'loss', 'content': 0.11019954085350037, 'timestamp': '2025-09-10 02:47:02.126026', 'step': 10154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:02.180653', 'step': 10154, 'epoch': 2} {'type': 'loss', 'content': 0.13987913727760315, 'timestamp': '2025-09-10 02:47:02.182665', 'step': 10155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:02.237763', 'step': 10155, 'epoch': 2} {'type': 'loss', 'content': 0.10466772317886353, 'timestamp': '2025-09-10 02:47:02.243756', 'step': 10156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:02.297366', 'step': 10156, 'epoch': 2} {'type': 'loss', 'content': 0.10460960865020752, 'timestamp': '2025-09-10 02:47:02.299366', 'step': 10157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:02.353515', 'step': 10157, 'epoch': 2} {'type': 'loss', 'content': 0.1447821855545044, 'timestamp': '2025-09-10 02:47:02.355601', 'step': 10158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:02.410215', 'step': 10158, 'epoch': 2} {'type': 'loss', 'content': 0.14354005455970764, 'timestamp': '2025-09-10 02:47:02.412209', 'step': 10159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:02.466346', 'step': 10159, 'epoch': 2} {'type': 'loss', 'content': 0.11137590557336807, 'timestamp': '2025-09-10 02:47:02.472556', 'step': 10160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:02.527130', 'step': 10160, 'epoch': 2} {'type': 'loss', 'content': 0.07191252708435059, 'timestamp': '2025-09-10 02:47:02.529111', 'step': 10161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:02.592702', 'step': 10161, 'epoch': 2} {'type': 'loss', 'content': 0.13320592045783997, 'timestamp': '2025-09-10 02:47:02.594622', 'step': 10162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:02.651247', 'step': 10162, 'epoch': 2} {'type': 'loss', 'content': 0.24976611137390137, 'timestamp': '2025-09-10 02:47:02.653167', 'step': 10163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:02.707265', 'step': 10163, 'epoch': 2} {'type': 'loss', 'content': 0.1332917958498001, 'timestamp': '2025-09-10 02:47:02.717681', 'step': 10164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:02.774197', 'step': 10164, 'epoch': 2} {'type': 'loss', 'content': 0.18031924962997437, 'timestamp': '2025-09-10 02:47:02.778556', 'step': 10165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:02.835559', 'step': 10165, 'epoch': 2} {'type': 'loss', 'content': 0.13859373331069946, 'timestamp': '2025-09-10 02:47:02.837913', 'step': 10166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:02.892324', 'step': 10166, 'epoch': 2} {'type': 'loss', 'content': 0.1975122094154358, 'timestamp': '2025-09-10 02:47:02.899732', 'step': 10167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:02.959301', 'step': 10167, 'epoch': 2} {'type': 'loss', 'content': 0.14118129014968872, 'timestamp': '2025-09-10 02:47:02.967548', 'step': 10168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:03.025424', 'step': 10168, 'epoch': 2} {'type': 'loss', 'content': 0.1483781337738037, 'timestamp': '2025-09-10 02:47:03.027618', 'step': 10169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:03.081950', 'step': 10169, 'epoch': 2} {'type': 'loss', 'content': 0.22361019253730774, 'timestamp': '2025-09-10 02:47:03.084092', 'step': 10170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:03.140303', 'step': 10170, 'epoch': 2} {'type': 'loss', 'content': 0.156581848859787, 'timestamp': '2025-09-10 02:47:03.142427', 'step': 10171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:03.197345', 'step': 10171, 'epoch': 2} {'type': 'loss', 'content': 0.15591788291931152, 'timestamp': '2025-09-10 02:47:03.203649', 'step': 10172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:03.275311', 'step': 10172, 'epoch': 2} {'type': 'loss', 'content': 0.17556723952293396, 'timestamp': '2025-09-10 02:47:03.277328', 'step': 10173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:03.334595', 'step': 10173, 'epoch': 2} {'type': 'loss', 'content': 0.08995859324932098, 'timestamp': '2025-09-10 02:47:03.336848', 'step': 10174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:03.391072', 'step': 10174, 'epoch': 2} {'type': 'loss', 'content': 0.18403033912181854, 'timestamp': '2025-09-10 02:47:03.393049', 'step': 10175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:03.448273', 'step': 10175, 'epoch': 2} {'type': 'loss', 'content': 0.11652328819036484, 'timestamp': '2025-09-10 02:47:03.454452', 'step': 10176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:03.509282', 'step': 10176, 'epoch': 2} {'type': 'loss', 'content': 0.09396533668041229, 'timestamp': '2025-09-10 02:47:03.512202', 'step': 10177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:03.569591', 'step': 10177, 'epoch': 2} {'type': 'loss', 'content': 0.16806799173355103, 'timestamp': '2025-09-10 02:47:03.577031', 'step': 10178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:03.636486', 'step': 10178, 'epoch': 2} {'type': 'loss', 'content': 0.0735282376408577, 'timestamp': '2025-09-10 02:47:03.638644', 'step': 10179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:03.693578', 'step': 10179, 'epoch': 2} {'type': 'loss', 'content': 0.25601842999458313, 'timestamp': '2025-09-10 02:47:03.705743', 'step': 10180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:03.760473', 'step': 10180, 'epoch': 2} {'type': 'loss', 'content': 0.1905975490808487, 'timestamp': '2025-09-10 02:47:03.763044', 'step': 10181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:03.820409', 'step': 10181, 'epoch': 2} {'type': 'loss', 'content': 0.13449342548847198, 'timestamp': '2025-09-10 02:47:03.824193', 'step': 10182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:03.893610', 'step': 10182, 'epoch': 2} {'type': 'loss', 'content': 0.15201222896575928, 'timestamp': '2025-09-10 02:47:03.896027', 'step': 10183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:03.954699', 'step': 10183, 'epoch': 2} {'type': 'loss', 'content': 0.14660730957984924, 'timestamp': '2025-09-10 02:47:03.961006', 'step': 10184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:04.014378', 'step': 10184, 'epoch': 2} {'type': 'loss', 'content': 0.14627623558044434, 'timestamp': '2025-09-10 02:47:04.023373', 'step': 10185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:04.084224', 'step': 10185, 'epoch': 2} {'type': 'loss', 'content': 0.09506186097860336, 'timestamp': '2025-09-10 02:47:04.087803', 'step': 10186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:04.141959', 'step': 10186, 'epoch': 2} {'type': 'loss', 'content': 0.14596858620643616, 'timestamp': '2025-09-10 02:47:04.145657', 'step': 10187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:04.202855', 'step': 10187, 'epoch': 2} {'type': 'loss', 'content': 0.193075492978096, 'timestamp': '2025-09-10 02:47:04.208845', 'step': 10188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:04.261622', 'step': 10188, 'epoch': 2} {'type': 'loss', 'content': 0.10068213939666748, 'timestamp': '2025-09-10 02:47:04.263672', 'step': 10189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:04.320021', 'step': 10189, 'epoch': 2} {'type': 'loss', 'content': 0.08719980716705322, 'timestamp': '2025-09-10 02:47:04.323291', 'step': 10190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:04.377434', 'step': 10190, 'epoch': 2} {'type': 'loss', 'content': 0.25754299759864807, 'timestamp': '2025-09-10 02:47:04.379675', 'step': 10191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:04.435705', 'step': 10191, 'epoch': 2} {'type': 'loss', 'content': 0.12394943833351135, 'timestamp': '2025-09-10 02:47:04.441811', 'step': 10192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:04.496320', 'step': 10192, 'epoch': 2} {'type': 'loss', 'content': 0.1337531954050064, 'timestamp': '2025-09-10 02:47:04.498496', 'step': 10193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:04.551875', 'step': 10193, 'epoch': 2} {'type': 'loss', 'content': 0.13691069185733795, 'timestamp': '2025-09-10 02:47:04.554270', 'step': 10194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:04.615308', 'step': 10194, 'epoch': 2} {'type': 'loss', 'content': 0.14985349774360657, 'timestamp': '2025-09-10 02:47:04.618140', 'step': 10195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:04.677074', 'step': 10195, 'epoch': 2} {'type': 'loss', 'content': 0.15294818580150604, 'timestamp': '2025-09-10 02:47:04.683069', 'step': 10196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:04.743219', 'step': 10196, 'epoch': 2} {'type': 'loss', 'content': 0.1019139289855957, 'timestamp': '2025-09-10 02:47:04.745438', 'step': 10197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:04.806468', 'step': 10197, 'epoch': 2} {'type': 'loss', 'content': 0.14268961548805237, 'timestamp': '2025-09-10 02:47:04.808493', 'step': 10198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:04.861716', 'step': 10198, 'epoch': 2} {'type': 'loss', 'content': 0.0769411027431488, 'timestamp': '2025-09-10 02:47:04.863977', 'step': 10199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:04.917924', 'step': 10199, 'epoch': 2} {'type': 'loss', 'content': 0.1424243152141571, 'timestamp': '2025-09-10 02:47:04.923861', 'step': 10200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:04.977124', 'step': 10200, 'epoch': 2} {'type': 'loss', 'content': 0.1463012546300888, 'timestamp': '2025-09-10 02:47:04.979514', 'step': 10201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:05.032758', 'step': 10201, 'epoch': 2} {'type': 'loss', 'content': 0.07708683609962463, 'timestamp': '2025-09-10 02:47:05.035043', 'step': 10202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:05.089550', 'step': 10202, 'epoch': 2} {'type': 'loss', 'content': 0.10830273479223251, 'timestamp': '2025-09-10 02:47:05.091479', 'step': 10203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:05.144742', 'step': 10203, 'epoch': 2} {'type': 'loss', 'content': 0.11608345806598663, 'timestamp': '2025-09-10 02:47:05.150684', 'step': 10204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:05.203967', 'step': 10204, 'epoch': 2} {'type': 'loss', 'content': 0.17364026606082916, 'timestamp': '2025-09-10 02:47:05.206212', 'step': 10205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:05.263935', 'step': 10205, 'epoch': 2} {'type': 'loss', 'content': 0.1044231578707695, 'timestamp': '2025-09-10 02:47:05.266130', 'step': 10206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:05.320094', 'step': 10206, 'epoch': 2} {'type': 'loss', 'content': 0.09026104211807251, 'timestamp': '2025-09-10 02:47:05.322109', 'step': 10207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:05.376016', 'step': 10207, 'epoch': 2} {'type': 'loss', 'content': 0.09797971695661545, 'timestamp': '2025-09-10 02:47:05.381730', 'step': 10208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:05.435646', 'step': 10208, 'epoch': 2} {'type': 'loss', 'content': 0.12763911485671997, 'timestamp': '2025-09-10 02:47:05.437595', 'step': 10209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:05.490936', 'step': 10209, 'epoch': 2} {'type': 'loss', 'content': 0.09337954223155975, 'timestamp': '2025-09-10 02:47:05.492918', 'step': 10210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:05.546424', 'step': 10210, 'epoch': 2} {'type': 'loss', 'content': 0.14331656694412231, 'timestamp': '2025-09-10 02:47:05.548192', 'step': 10211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:05.601301', 'step': 10211, 'epoch': 2} {'type': 'loss', 'content': 0.07240518927574158, 'timestamp': '2025-09-10 02:47:05.607707', 'step': 10212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:05.660408', 'step': 10212, 'epoch': 2} {'type': 'loss', 'content': 0.09631483256816864, 'timestamp': '2025-09-10 02:47:05.664664', 'step': 10213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:05.720627', 'step': 10213, 'epoch': 2} {'type': 'loss', 'content': 0.20077729225158691, 'timestamp': '2025-09-10 02:47:05.722947', 'step': 10214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:05.779665', 'step': 10214, 'epoch': 2} {'type': 'loss', 'content': 0.163624107837677, 'timestamp': '2025-09-10 02:47:05.782196', 'step': 10215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:05.836519', 'step': 10215, 'epoch': 2} {'type': 'loss', 'content': 0.09823250770568848, 'timestamp': '2025-09-10 02:47:05.842749', 'step': 10216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:05.896652', 'step': 10216, 'epoch': 2} {'type': 'loss', 'content': 0.11435268074274063, 'timestamp': '2025-09-10 02:47:05.898572', 'step': 10217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:05.952666', 'step': 10217, 'epoch': 2} {'type': 'loss', 'content': 0.16897369921207428, 'timestamp': '2025-09-10 02:47:05.956751', 'step': 10218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:06.013328', 'step': 10218, 'epoch': 2} {'type': 'loss', 'content': 0.10699053853750229, 'timestamp': '2025-09-10 02:47:06.015185', 'step': 10219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:06.070170', 'step': 10219, 'epoch': 2} {'type': 'loss', 'content': 0.08228270709514618, 'timestamp': '2025-09-10 02:47:06.076264', 'step': 10220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:06.139490', 'step': 10220, 'epoch': 2} {'type': 'loss', 'content': 0.15225861966609955, 'timestamp': '2025-09-10 02:47:06.141808', 'step': 10221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:06.201926', 'step': 10221, 'epoch': 2} {'type': 'loss', 'content': 0.08751796185970306, 'timestamp': '2025-09-10 02:47:06.207312', 'step': 10222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:06.263259', 'step': 10222, 'epoch': 2} {'type': 'loss', 'content': 0.08458170294761658, 'timestamp': '2025-09-10 02:47:06.265642', 'step': 10223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:06.321397', 'step': 10223, 'epoch': 2} {'type': 'loss', 'content': 0.15873773396015167, 'timestamp': '2025-09-10 02:47:06.327627', 'step': 10224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:06.395144', 'step': 10224, 'epoch': 2} {'type': 'loss', 'content': 0.1560317873954773, 'timestamp': '2025-09-10 02:47:06.397075', 'step': 10225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:06.451300', 'step': 10225, 'epoch': 2} {'type': 'loss', 'content': 0.14743320643901825, 'timestamp': '2025-09-10 02:47:06.454356', 'step': 10226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:06.514203', 'step': 10226, 'epoch': 2} {'type': 'loss', 'content': 0.15362411737442017, 'timestamp': '2025-09-10 02:47:06.517411', 'step': 10227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:06.587146', 'step': 10227, 'epoch': 2} {'type': 'loss', 'content': 0.11972545087337494, 'timestamp': '2025-09-10 02:47:06.593677', 'step': 10228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:06.650631', 'step': 10228, 'epoch': 2} {'type': 'loss', 'content': 0.12805578112602234, 'timestamp': '2025-09-10 02:47:06.653794', 'step': 10229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:06.709735', 'step': 10229, 'epoch': 2} {'type': 'loss', 'content': 0.19293981790542603, 'timestamp': '2025-09-10 02:47:06.711954', 'step': 10230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:06.770471', 'step': 10230, 'epoch': 2} {'type': 'loss', 'content': 0.07066185027360916, 'timestamp': '2025-09-10 02:47:06.772816', 'step': 10231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:06.826930', 'step': 10231, 'epoch': 2} {'type': 'loss', 'content': 0.21633006632328033, 'timestamp': '2025-09-10 02:47:06.833091', 'step': 10232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:06.886836', 'step': 10232, 'epoch': 2} {'type': 'loss', 'content': 0.0623074471950531, 'timestamp': '2025-09-10 02:47:06.888649', 'step': 10233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:06.942979', 'step': 10233, 'epoch': 2} {'type': 'loss', 'content': 0.16915085911750793, 'timestamp': '2025-09-10 02:47:06.945348', 'step': 10234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:06.999469', 'step': 10234, 'epoch': 2} {'type': 'loss', 'content': 0.1326798051595688, 'timestamp': '2025-09-10 02:47:07.001629', 'step': 10235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:07.055286', 'step': 10235, 'epoch': 2} {'type': 'loss', 'content': 0.08536136150360107, 'timestamp': '2025-09-10 02:47:07.061595', 'step': 10236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:07.115014', 'step': 10236, 'epoch': 2} {'type': 'loss', 'content': 0.12162550538778305, 'timestamp': '2025-09-10 02:47:07.117323', 'step': 10237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:07.172762', 'step': 10237, 'epoch': 2} {'type': 'loss', 'content': 0.11315029114484787, 'timestamp': '2025-09-10 02:47:07.178306', 'step': 10238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:07.234061', 'step': 10238, 'epoch': 2} {'type': 'loss', 'content': 0.09160474687814713, 'timestamp': '2025-09-10 02:47:07.235940', 'step': 10239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:07.289915', 'step': 10239, 'epoch': 2} {'type': 'loss', 'content': 0.1924021691083908, 'timestamp': '2025-09-10 02:47:07.295737', 'step': 10240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:07.349202', 'step': 10240, 'epoch': 2} {'type': 'loss', 'content': 0.1335277557373047, 'timestamp': '2025-09-10 02:47:07.351116', 'step': 10241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:07.407025', 'step': 10241, 'epoch': 2} {'type': 'loss', 'content': 0.15775465965270996, 'timestamp': '2025-09-10 02:47:07.408880', 'step': 10242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:07.462206', 'step': 10242, 'epoch': 2} {'type': 'loss', 'content': 0.1523894965648651, 'timestamp': '2025-09-10 02:47:07.464551', 'step': 10243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:07.518316', 'step': 10243, 'epoch': 2} {'type': 'loss', 'content': 0.17438271641731262, 'timestamp': '2025-09-10 02:47:07.524419', 'step': 10244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:07.577648', 'step': 10244, 'epoch': 2} {'type': 'loss', 'content': 0.047831013798713684, 'timestamp': '2025-09-10 02:47:07.579923', 'step': 10245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:07.636903', 'step': 10245, 'epoch': 2} {'type': 'loss', 'content': 0.2279059737920761, 'timestamp': '2025-09-10 02:47:07.639138', 'step': 10246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:07.692854', 'step': 10246, 'epoch': 2} {'type': 'loss', 'content': 0.14603795111179352, 'timestamp': '2025-09-10 02:47:07.695024', 'step': 10247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:07.748665', 'step': 10247, 'epoch': 2} {'type': 'loss', 'content': 0.15356621146202087, 'timestamp': '2025-09-10 02:47:07.754398', 'step': 10248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:07.810108', 'step': 10248, 'epoch': 2} {'type': 'loss', 'content': 0.11848225444555283, 'timestamp': '2025-09-10 02:47:07.811930', 'step': 10249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:07.866013', 'step': 10249, 'epoch': 2} {'type': 'loss', 'content': 0.17738842964172363, 'timestamp': '2025-09-10 02:47:07.867960', 'step': 10250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:07.922344', 'step': 10250, 'epoch': 2} {'type': 'loss', 'content': 0.09541032463312149, 'timestamp': '2025-09-10 02:47:07.924260', 'step': 10251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:07.981528', 'step': 10251, 'epoch': 2} {'type': 'loss', 'content': 0.3025933802127838, 'timestamp': '2025-09-10 02:47:07.987947', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:47:20.872086', 'step': 10252, 'epoch': 2} {'type': 'pplx', 'content': 13183.852189968551, 'timestamp': '2025-09-10 02:47:20.875527', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:20.930799', 'step': 10252, 'epoch': 2} {'type': 'loss', 'content': 0.11283880472183228, 'timestamp': '2025-09-10 02:47:20.933194', 'step': 10253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:20.989032', 'step': 10253, 'epoch': 2} {'type': 'loss', 'content': 0.14623823761940002, 'timestamp': '2025-09-10 02:47:20.991392', 'step': 10254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:21.051481', 'step': 10254, 'epoch': 2} {'type': 'loss', 'content': 0.1022934839129448, 'timestamp': '2025-09-10 02:47:21.053406', 'step': 10255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:21.110328', 'step': 10255, 'epoch': 2} {'type': 'loss', 'content': 0.11167246848344803, 'timestamp': '2025-09-10 02:47:21.116460', 'step': 10256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:21.171236', 'step': 10256, 'epoch': 2} {'type': 'loss', 'content': 0.14068004488945007, 'timestamp': '2025-09-10 02:47:21.173272', 'step': 10257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:21.226628', 'step': 10257, 'epoch': 2} {'type': 'loss', 'content': 0.14417529106140137, 'timestamp': '2025-09-10 02:47:21.228845', 'step': 10258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:21.282013', 'step': 10258, 'epoch': 2} {'type': 'loss', 'content': 0.059008195996284485, 'timestamp': '2025-09-10 02:47:21.284078', 'step': 10259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:21.338410', 'step': 10259, 'epoch': 2} {'type': 'loss', 'content': 0.10421763360500336, 'timestamp': '2025-09-10 02:47:21.344719', 'step': 10260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:21.397786', 'step': 10260, 'epoch': 2} {'type': 'loss', 'content': 0.07005380094051361, 'timestamp': '2025-09-10 02:47:21.399914', 'step': 10261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:21.453276', 'step': 10261, 'epoch': 2} {'type': 'loss', 'content': 0.11433439701795578, 'timestamp': '2025-09-10 02:47:21.455069', 'step': 10262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:21.508726', 'step': 10262, 'epoch': 2} {'type': 'loss', 'content': 0.06469973176717758, 'timestamp': '2025-09-10 02:47:21.510643', 'step': 10263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:21.564057', 'step': 10263, 'epoch': 2} {'type': 'loss', 'content': 0.16552351415157318, 'timestamp': '2025-09-10 02:47:21.570223', 'step': 10264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:21.624470', 'step': 10264, 'epoch': 2} {'type': 'loss', 'content': 0.1683133840560913, 'timestamp': '2025-09-10 02:47:21.626478', 'step': 10265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:21.679568', 'step': 10265, 'epoch': 2} {'type': 'loss', 'content': 0.10164754092693329, 'timestamp': '2025-09-10 02:47:21.683140', 'step': 10266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:21.737172', 'step': 10266, 'epoch': 2} {'type': 'loss', 'content': 0.1976180523633957, 'timestamp': '2025-09-10 02:47:21.739982', 'step': 10267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:21.793969', 'step': 10267, 'epoch': 2} {'type': 'loss', 'content': 0.1728973388671875, 'timestamp': '2025-09-10 02:47:21.800157', 'step': 10268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:21.852615', 'step': 10268, 'epoch': 2} {'type': 'loss', 'content': 0.2966473400592804, 'timestamp': '2025-09-10 02:47:21.854849', 'step': 10269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:21.909567', 'step': 10269, 'epoch': 2} {'type': 'loss', 'content': 0.13454735279083252, 'timestamp': '2025-09-10 02:47:21.911719', 'step': 10270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:21.970157', 'step': 10270, 'epoch': 2} {'type': 'loss', 'content': 0.07980307191610336, 'timestamp': '2025-09-10 02:47:21.972600', 'step': 10271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:22.029897', 'step': 10271, 'epoch': 2} {'type': 'loss', 'content': 0.031094836071133614, 'timestamp': '2025-09-10 02:47:22.036303', 'step': 10272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:22.090126', 'step': 10272, 'epoch': 2} {'type': 'loss', 'content': 0.13960477709770203, 'timestamp': '2025-09-10 02:47:22.093984', 'step': 10273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:22.149123', 'step': 10273, 'epoch': 2} {'type': 'loss', 'content': 0.08259546011686325, 'timestamp': '2025-09-10 02:47:22.151511', 'step': 10274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:22.216018', 'step': 10274, 'epoch': 2} {'type': 'loss', 'content': 0.14647774398326874, 'timestamp': '2025-09-10 02:47:22.218491', 'step': 10275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:22.272846', 'step': 10275, 'epoch': 2} {'type': 'loss', 'content': 0.13886845111846924, 'timestamp': '2025-09-10 02:47:22.279337', 'step': 10276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:22.334273', 'step': 10276, 'epoch': 2} {'type': 'loss', 'content': 0.10229948163032532, 'timestamp': '2025-09-10 02:47:22.336672', 'step': 10277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:22.394904', 'step': 10277, 'epoch': 2} {'type': 'loss', 'content': 0.09492041170597076, 'timestamp': '2025-09-10 02:47:22.397320', 'step': 10278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:22.453171', 'step': 10278, 'epoch': 2} {'type': 'loss', 'content': 0.2815797030925751, 'timestamp': '2025-09-10 02:47:22.455449', 'step': 10279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:22.509971', 'step': 10279, 'epoch': 2} {'type': 'loss', 'content': 0.16017811000347137, 'timestamp': '2025-09-10 02:47:22.516098', 'step': 10280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:22.569167', 'step': 10280, 'epoch': 2} {'type': 'loss', 'content': 0.13446269929409027, 'timestamp': '2025-09-10 02:47:22.571318', 'step': 10281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:22.624663', 'step': 10281, 'epoch': 2} {'type': 'loss', 'content': 0.2077476680278778, 'timestamp': '2025-09-10 02:47:22.628462', 'step': 10282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:22.683609', 'step': 10282, 'epoch': 2} {'type': 'loss', 'content': 0.1205528974533081, 'timestamp': '2025-09-10 02:47:22.686125', 'step': 10283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:22.744181', 'step': 10283, 'epoch': 2} {'type': 'loss', 'content': 0.056988269090652466, 'timestamp': '2025-09-10 02:47:22.750476', 'step': 10284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:22.804306', 'step': 10284, 'epoch': 2} {'type': 'loss', 'content': 0.1260339766740799, 'timestamp': '2025-09-10 02:47:22.806384', 'step': 10285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:22.861771', 'step': 10285, 'epoch': 2} {'type': 'loss', 'content': 0.14340274035930634, 'timestamp': '2025-09-10 02:47:22.863975', 'step': 10286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:22.923563', 'step': 10286, 'epoch': 2} {'type': 'loss', 'content': 0.13538804650306702, 'timestamp': '2025-09-10 02:47:22.925897', 'step': 10287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:22.984005', 'step': 10287, 'epoch': 2} {'type': 'loss', 'content': 0.06941339373588562, 'timestamp': '2025-09-10 02:47:22.991756', 'step': 10288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:23.047763', 'step': 10288, 'epoch': 2} {'type': 'loss', 'content': 0.06896086782217026, 'timestamp': '2025-09-10 02:47:23.050015', 'step': 10289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:23.104240', 'step': 10289, 'epoch': 2} {'type': 'loss', 'content': 0.2029799073934555, 'timestamp': '2025-09-10 02:47:23.106483', 'step': 10290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:23.159545', 'step': 10290, 'epoch': 2} {'type': 'loss', 'content': 0.145351842045784, 'timestamp': '2025-09-10 02:47:23.161946', 'step': 10291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:23.215166', 'step': 10291, 'epoch': 2} {'type': 'loss', 'content': 0.11935020238161087, 'timestamp': '2025-09-10 02:47:23.223838', 'step': 10292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:23.276680', 'step': 10292, 'epoch': 2} {'type': 'loss', 'content': 0.09124525636434555, 'timestamp': '2025-09-10 02:47:23.278823', 'step': 10293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:23.332809', 'step': 10293, 'epoch': 2} {'type': 'loss', 'content': 0.098612941801548, 'timestamp': '2025-09-10 02:47:23.337189', 'step': 10294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:23.391891', 'step': 10294, 'epoch': 2} {'type': 'loss', 'content': 0.06537485122680664, 'timestamp': '2025-09-10 02:47:23.394065', 'step': 10295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:23.447398', 'step': 10295, 'epoch': 2} {'type': 'loss', 'content': 0.11996173113584518, 'timestamp': '2025-09-10 02:47:23.453467', 'step': 10296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:23.506353', 'step': 10296, 'epoch': 2} {'type': 'loss', 'content': 0.16541524231433868, 'timestamp': '2025-09-10 02:47:23.508443', 'step': 10297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:23.564719', 'step': 10297, 'epoch': 2} {'type': 'loss', 'content': 0.17304834723472595, 'timestamp': '2025-09-10 02:47:23.566994', 'step': 10298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:23.623049', 'step': 10298, 'epoch': 2} {'type': 'loss', 'content': 0.09504791349172592, 'timestamp': '2025-09-10 02:47:23.625205', 'step': 10299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:23.678176', 'step': 10299, 'epoch': 2} {'type': 'loss', 'content': 0.14349262416362762, 'timestamp': '2025-09-10 02:47:23.684419', 'step': 10300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:23.737062', 'step': 10300, 'epoch': 2} {'type': 'loss', 'content': 0.23378048837184906, 'timestamp': '2025-09-10 02:47:23.739363', 'step': 10301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:23.792937', 'step': 10301, 'epoch': 2} {'type': 'loss', 'content': 0.10946948081254959, 'timestamp': '2025-09-10 02:47:23.795433', 'step': 10302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:23.849170', 'step': 10302, 'epoch': 2} {'type': 'loss', 'content': 0.11046208441257477, 'timestamp': '2025-09-10 02:47:23.851435', 'step': 10303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:23.904461', 'step': 10303, 'epoch': 2} {'type': 'loss', 'content': 0.1329287588596344, 'timestamp': '2025-09-10 02:47:23.910600', 'step': 10304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:23.964949', 'step': 10304, 'epoch': 2} {'type': 'loss', 'content': 0.08144683390855789, 'timestamp': '2025-09-10 02:47:23.967295', 'step': 10305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:24.020809', 'step': 10305, 'epoch': 2} {'type': 'loss', 'content': 0.1512327939271927, 'timestamp': '2025-09-10 02:47:24.023217', 'step': 10306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:24.076857', 'step': 10306, 'epoch': 2} {'type': 'loss', 'content': 0.09859238564968109, 'timestamp': '2025-09-10 02:47:24.079100', 'step': 10307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:24.133931', 'step': 10307, 'epoch': 2} {'type': 'loss', 'content': 0.08975028246641159, 'timestamp': '2025-09-10 02:47:24.140076', 'step': 10308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:24.196586', 'step': 10308, 'epoch': 2} {'type': 'loss', 'content': 0.1252768486738205, 'timestamp': '2025-09-10 02:47:24.198639', 'step': 10309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:24.251732', 'step': 10309, 'epoch': 2} {'type': 'loss', 'content': 0.10239134728908539, 'timestamp': '2025-09-10 02:47:24.254034', 'step': 10310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:24.308177', 'step': 10310, 'epoch': 2} {'type': 'loss', 'content': 0.11552523076534271, 'timestamp': '2025-09-10 02:47:24.310493', 'step': 10311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:24.363515', 'step': 10311, 'epoch': 2} {'type': 'loss', 'content': 0.0984046459197998, 'timestamp': '2025-09-10 02:47:24.369591', 'step': 10312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:24.423852', 'step': 10312, 'epoch': 2} {'type': 'loss', 'content': 0.10195634514093399, 'timestamp': '2025-09-10 02:47:24.426347', 'step': 10313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:24.480292', 'step': 10313, 'epoch': 2} {'type': 'loss', 'content': 0.10755506157875061, 'timestamp': '2025-09-10 02:47:24.482611', 'step': 10314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:24.535884', 'step': 10314, 'epoch': 2} {'type': 'loss', 'content': 0.14810027182102203, 'timestamp': '2025-09-10 02:47:24.538144', 'step': 10315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:24.591921', 'step': 10315, 'epoch': 2} {'type': 'loss', 'content': 0.15236985683441162, 'timestamp': '2025-09-10 02:47:24.598128', 'step': 10316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:24.652152', 'step': 10316, 'epoch': 2} {'type': 'loss', 'content': 0.19099758565425873, 'timestamp': '2025-09-10 02:47:24.654461', 'step': 10317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:24.707624', 'step': 10317, 'epoch': 2} {'type': 'loss', 'content': 0.1323591023683548, 'timestamp': '2025-09-10 02:47:24.710537', 'step': 10318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:24.763301', 'step': 10318, 'epoch': 2} {'type': 'loss', 'content': 0.20869147777557373, 'timestamp': '2025-09-10 02:47:24.765539', 'step': 10319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:24.818665', 'step': 10319, 'epoch': 2} {'type': 'loss', 'content': 0.20014172792434692, 'timestamp': '2025-09-10 02:47:24.824582', 'step': 10320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:24.877313', 'step': 10320, 'epoch': 2} {'type': 'loss', 'content': 0.10669146478176117, 'timestamp': '2025-09-10 02:47:24.879514', 'step': 10321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:24.933096', 'step': 10321, 'epoch': 2} {'type': 'loss', 'content': 0.16677775979042053, 'timestamp': '2025-09-10 02:47:24.935136', 'step': 10322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:24.988851', 'step': 10322, 'epoch': 2} {'type': 'loss', 'content': 0.040231578052043915, 'timestamp': '2025-09-10 02:47:24.991308', 'step': 10323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:25.044323', 'step': 10323, 'epoch': 2} {'type': 'loss', 'content': 0.08315613865852356, 'timestamp': '2025-09-10 02:47:25.050022', 'step': 10324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:25.102869', 'step': 10324, 'epoch': 2} {'type': 'loss', 'content': 0.13730663061141968, 'timestamp': '2025-09-10 02:47:25.105110', 'step': 10325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:25.158913', 'step': 10325, 'epoch': 2} {'type': 'loss', 'content': 0.1467205286026001, 'timestamp': '2025-09-10 02:47:25.161263', 'step': 10326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:25.215544', 'step': 10326, 'epoch': 2} {'type': 'loss', 'content': 0.13669893145561218, 'timestamp': '2025-09-10 02:47:25.217885', 'step': 10327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:25.271757', 'step': 10327, 'epoch': 2} {'type': 'loss', 'content': 0.12373904138803482, 'timestamp': '2025-09-10 02:47:25.277861', 'step': 10328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:25.331201', 'step': 10328, 'epoch': 2} {'type': 'loss', 'content': 0.1653209626674652, 'timestamp': '2025-09-10 02:47:25.333317', 'step': 10329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:25.386773', 'step': 10329, 'epoch': 2} {'type': 'loss', 'content': 0.06931998580694199, 'timestamp': '2025-09-10 02:47:25.389189', 'step': 10330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:25.442526', 'step': 10330, 'epoch': 2} {'type': 'loss', 'content': 0.11008428782224655, 'timestamp': '2025-09-10 02:47:25.444705', 'step': 10331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:25.498434', 'step': 10331, 'epoch': 2} {'type': 'loss', 'content': 0.06766480952501297, 'timestamp': '2025-09-10 02:47:25.504619', 'step': 10332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:25.558141', 'step': 10332, 'epoch': 2} {'type': 'loss', 'content': 0.17986717820167542, 'timestamp': '2025-09-10 02:47:25.560605', 'step': 10333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:25.613830', 'step': 10333, 'epoch': 2} {'type': 'loss', 'content': 0.09519754350185394, 'timestamp': '2025-09-10 02:47:25.615948', 'step': 10334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:25.669456', 'step': 10334, 'epoch': 2} {'type': 'loss', 'content': 0.12164794653654099, 'timestamp': '2025-09-10 02:47:25.671816', 'step': 10335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:25.724727', 'step': 10335, 'epoch': 2} {'type': 'loss', 'content': 0.17836979031562805, 'timestamp': '2025-09-10 02:47:25.730791', 'step': 10336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:25.784626', 'step': 10336, 'epoch': 2} {'type': 'loss', 'content': 0.17782337963581085, 'timestamp': '2025-09-10 02:47:25.787018', 'step': 10337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:25.839960', 'step': 10337, 'epoch': 2} {'type': 'loss', 'content': 0.12635308504104614, 'timestamp': '2025-09-10 02:47:25.842361', 'step': 10338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:25.895464', 'step': 10338, 'epoch': 2} {'type': 'loss', 'content': 0.11955089867115021, 'timestamp': '2025-09-10 02:47:25.897848', 'step': 10339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:25.951164', 'step': 10339, 'epoch': 2} {'type': 'loss', 'content': 0.12064026296138763, 'timestamp': '2025-09-10 02:47:25.957240', 'step': 10340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:26.010357', 'step': 10340, 'epoch': 2} {'type': 'loss', 'content': 0.1316169798374176, 'timestamp': '2025-09-10 02:47:26.012741', 'step': 10341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:26.066087', 'step': 10341, 'epoch': 2} {'type': 'loss', 'content': 0.19105543196201324, 'timestamp': '2025-09-10 02:47:26.068288', 'step': 10342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:26.122312', 'step': 10342, 'epoch': 2} {'type': 'loss', 'content': 0.09750572592020035, 'timestamp': '2025-09-10 02:47:26.124460', 'step': 10343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:26.177552', 'step': 10343, 'epoch': 2} {'type': 'loss', 'content': 0.20303666591644287, 'timestamp': '2025-09-10 02:47:26.183771', 'step': 10344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:26.236985', 'step': 10344, 'epoch': 2} {'type': 'loss', 'content': 0.06529153138399124, 'timestamp': '2025-09-10 02:47:26.239077', 'step': 10345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:26.292097', 'step': 10345, 'epoch': 2} {'type': 'loss', 'content': 0.039150457829236984, 'timestamp': '2025-09-10 02:47:26.294523', 'step': 10346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:26.348466', 'step': 10346, 'epoch': 2} {'type': 'loss', 'content': 0.12117859721183777, 'timestamp': '2025-09-10 02:47:26.350702', 'step': 10347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:26.403961', 'step': 10347, 'epoch': 2} {'type': 'loss', 'content': 0.09720579534769058, 'timestamp': '2025-09-10 02:47:26.409963', 'step': 10348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:26.462522', 'step': 10348, 'epoch': 2} {'type': 'loss', 'content': 0.1661635935306549, 'timestamp': '2025-09-10 02:47:26.464857', 'step': 10349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:26.520427', 'step': 10349, 'epoch': 2} {'type': 'loss', 'content': 0.10523532330989838, 'timestamp': '2025-09-10 02:47:26.522553', 'step': 10350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:26.577919', 'step': 10350, 'epoch': 2} {'type': 'loss', 'content': 0.07828492671251297, 'timestamp': '2025-09-10 02:47:26.580117', 'step': 10351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:26.633432', 'step': 10351, 'epoch': 2} {'type': 'loss', 'content': 0.0813748836517334, 'timestamp': '2025-09-10 02:47:26.639689', 'step': 10352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:26.692334', 'step': 10352, 'epoch': 2} {'type': 'loss', 'content': 0.09387646615505219, 'timestamp': '2025-09-10 02:47:26.694669', 'step': 10353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:26.749896', 'step': 10353, 'epoch': 2} {'type': 'loss', 'content': 0.11424608528614044, 'timestamp': '2025-09-10 02:47:26.752191', 'step': 10354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:26.806302', 'step': 10354, 'epoch': 2} {'type': 'loss', 'content': 0.09303686767816544, 'timestamp': '2025-09-10 02:47:26.808669', 'step': 10355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:26.862121', 'step': 10355, 'epoch': 2} {'type': 'loss', 'content': 0.15084753930568695, 'timestamp': '2025-09-10 02:47:26.868556', 'step': 10356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:26.922211', 'step': 10356, 'epoch': 2} {'type': 'loss', 'content': 0.05405411124229431, 'timestamp': '2025-09-10 02:47:26.924375', 'step': 10357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:26.977619', 'step': 10357, 'epoch': 2} {'type': 'loss', 'content': 0.12164513021707535, 'timestamp': '2025-09-10 02:47:26.979743', 'step': 10358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:27.033074', 'step': 10358, 'epoch': 2} {'type': 'loss', 'content': 0.2061389684677124, 'timestamp': '2025-09-10 02:47:27.035068', 'step': 10359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:27.089602', 'step': 10359, 'epoch': 2} {'type': 'loss', 'content': 0.117983877658844, 'timestamp': '2025-09-10 02:47:27.095965', 'step': 10360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:27.149846', 'step': 10360, 'epoch': 2} {'type': 'loss', 'content': 0.11901680380105972, 'timestamp': '2025-09-10 02:47:27.152188', 'step': 10361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:27.206745', 'step': 10361, 'epoch': 2} {'type': 'loss', 'content': 0.1046815738081932, 'timestamp': '2025-09-10 02:47:27.209123', 'step': 10362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:27.263367', 'step': 10362, 'epoch': 2} {'type': 'loss', 'content': 0.17707686126232147, 'timestamp': '2025-09-10 02:47:27.265357', 'step': 10363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:27.318389', 'step': 10363, 'epoch': 2} {'type': 'loss', 'content': 0.04530404880642891, 'timestamp': '2025-09-10 02:47:27.324649', 'step': 10364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:27.377187', 'step': 10364, 'epoch': 2} {'type': 'loss', 'content': 0.1323850452899933, 'timestamp': '2025-09-10 02:47:27.379290', 'step': 10365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:27.432267', 'step': 10365, 'epoch': 2} {'type': 'loss', 'content': 0.19778841733932495, 'timestamp': '2025-09-10 02:47:27.434549', 'step': 10366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:27.488883', 'step': 10366, 'epoch': 2} {'type': 'loss', 'content': 0.08039948344230652, 'timestamp': '2025-09-10 02:47:27.491410', 'step': 10367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:27.545309', 'step': 10367, 'epoch': 2} {'type': 'loss', 'content': 0.13255243003368378, 'timestamp': '2025-09-10 02:47:27.551429', 'step': 10368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:27.604477', 'step': 10368, 'epoch': 2} {'type': 'loss', 'content': 0.10933230817317963, 'timestamp': '2025-09-10 02:47:27.606829', 'step': 10369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:27.659917', 'step': 10369, 'epoch': 2} {'type': 'loss', 'content': 0.0854915976524353, 'timestamp': '2025-09-10 02:47:27.662130', 'step': 10370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:27.715091', 'step': 10370, 'epoch': 2} {'type': 'loss', 'content': 0.11021439731121063, 'timestamp': '2025-09-10 02:47:27.717191', 'step': 10371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:27.770646', 'step': 10371, 'epoch': 2} {'type': 'loss', 'content': 0.165274977684021, 'timestamp': '2025-09-10 02:47:27.776763', 'step': 10372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:27.829897', 'step': 10372, 'epoch': 2} {'type': 'loss', 'content': 0.10664503276348114, 'timestamp': '2025-09-10 02:47:27.833608', 'step': 10373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:27.887859', 'step': 10373, 'epoch': 2} {'type': 'loss', 'content': 0.11645951122045517, 'timestamp': '2025-09-10 02:47:27.890052', 'step': 10374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:27.945144', 'step': 10374, 'epoch': 2} {'type': 'loss', 'content': 0.13149484992027283, 'timestamp': '2025-09-10 02:47:27.947402', 'step': 10375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:28.001977', 'step': 10375, 'epoch': 2} {'type': 'loss', 'content': 0.13115626573562622, 'timestamp': '2025-09-10 02:47:28.008375', 'step': 10376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:28.061301', 'step': 10376, 'epoch': 2} {'type': 'loss', 'content': 0.08755815029144287, 'timestamp': '2025-09-10 02:47:28.063714', 'step': 10377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:28.116848', 'step': 10377, 'epoch': 2} {'type': 'loss', 'content': 0.1299227476119995, 'timestamp': '2025-09-10 02:47:28.118963', 'step': 10378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:28.172650', 'step': 10378, 'epoch': 2} {'type': 'loss', 'content': 0.1373795121908188, 'timestamp': '2025-09-10 02:47:28.175014', 'step': 10379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:28.228241', 'step': 10379, 'epoch': 2} {'type': 'loss', 'content': 0.185288205742836, 'timestamp': '2025-09-10 02:47:28.234695', 'step': 10380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:28.287853', 'step': 10380, 'epoch': 2} {'type': 'loss', 'content': 0.15659460425376892, 'timestamp': '2025-09-10 02:47:28.290274', 'step': 10381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:28.344335', 'step': 10381, 'epoch': 2} {'type': 'loss', 'content': 0.09444994479417801, 'timestamp': '2025-09-10 02:47:28.346682', 'step': 10382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:28.401217', 'step': 10382, 'epoch': 2} {'type': 'loss', 'content': 0.11356185376644135, 'timestamp': '2025-09-10 02:47:28.403601', 'step': 10383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:28.456807', 'step': 10383, 'epoch': 2} {'type': 'loss', 'content': 0.1379428505897522, 'timestamp': '2025-09-10 02:47:28.463287', 'step': 10384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:28.516829', 'step': 10384, 'epoch': 2} {'type': 'loss', 'content': 0.18108300864696503, 'timestamp': '2025-09-10 02:47:28.519392', 'step': 10385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:28.573463', 'step': 10385, 'epoch': 2} {'type': 'loss', 'content': 0.15860024094581604, 'timestamp': '2025-09-10 02:47:28.576188', 'step': 10386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:28.629789', 'step': 10386, 'epoch': 2} {'type': 'loss', 'content': 0.04274146631360054, 'timestamp': '2025-09-10 02:47:28.632071', 'step': 10387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:28.685818', 'step': 10387, 'epoch': 2} {'type': 'loss', 'content': 0.12911416590213776, 'timestamp': '2025-09-10 02:47:28.692063', 'step': 10388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:28.744744', 'step': 10388, 'epoch': 2} {'type': 'loss', 'content': 0.18146218359470367, 'timestamp': '2025-09-10 02:47:28.747224', 'step': 10389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:28.802548', 'step': 10389, 'epoch': 2} {'type': 'loss', 'content': 0.11667529493570328, 'timestamp': '2025-09-10 02:47:28.805068', 'step': 10390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:28.861181', 'step': 10390, 'epoch': 2} {'type': 'loss', 'content': 0.1072423905134201, 'timestamp': '2025-09-10 02:47:28.863593', 'step': 10391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:28.917076', 'step': 10391, 'epoch': 2} {'type': 'loss', 'content': 0.14450019598007202, 'timestamp': '2025-09-10 02:47:28.923272', 'step': 10392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:28.976025', 'step': 10392, 'epoch': 2} {'type': 'loss', 'content': 0.13899074494838715, 'timestamp': '2025-09-10 02:47:28.978367', 'step': 10393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:29.032055', 'step': 10393, 'epoch': 2} {'type': 'loss', 'content': 0.2022700160741806, 'timestamp': '2025-09-10 02:47:29.034419', 'step': 10394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:29.088647', 'step': 10394, 'epoch': 2} {'type': 'loss', 'content': 0.1040644571185112, 'timestamp': '2025-09-10 02:47:29.091030', 'step': 10395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:29.147908', 'step': 10395, 'epoch': 2} {'type': 'loss', 'content': 0.17901460826396942, 'timestamp': '2025-09-10 02:47:29.154372', 'step': 10396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:29.207571', 'step': 10396, 'epoch': 2} {'type': 'loss', 'content': 0.1795680969953537, 'timestamp': '2025-09-10 02:47:29.209842', 'step': 10397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:29.263417', 'step': 10397, 'epoch': 2} {'type': 'loss', 'content': 0.21211101114749908, 'timestamp': '2025-09-10 02:47:29.265737', 'step': 10398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:29.319417', 'step': 10398, 'epoch': 2} {'type': 'loss', 'content': 0.1557350605726242, 'timestamp': '2025-09-10 02:47:29.321715', 'step': 10399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:29.375166', 'step': 10399, 'epoch': 2} {'type': 'loss', 'content': 0.15221507847309113, 'timestamp': '2025-09-10 02:47:29.381360', 'step': 10400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:29.435106', 'step': 10400, 'epoch': 2} {'type': 'loss', 'content': 0.1524774581193924, 'timestamp': '2025-09-10 02:47:29.437470', 'step': 10401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:29.490900', 'step': 10401, 'epoch': 2} {'type': 'loss', 'content': 0.0975356176495552, 'timestamp': '2025-09-10 02:47:29.493202', 'step': 10402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:29.547096', 'step': 10402, 'epoch': 2} {'type': 'loss', 'content': 0.10870833694934845, 'timestamp': '2025-09-10 02:47:29.549548', 'step': 10403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:29.604432', 'step': 10403, 'epoch': 2} {'type': 'loss', 'content': 0.07125931233167648, 'timestamp': '2025-09-10 02:47:29.610778', 'step': 10404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:29.664209', 'step': 10404, 'epoch': 2} {'type': 'loss', 'content': 0.1327458769083023, 'timestamp': '2025-09-10 02:47:29.666584', 'step': 10405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:29.722656', 'step': 10405, 'epoch': 2} {'type': 'loss', 'content': 0.11265616863965988, 'timestamp': '2025-09-10 02:47:29.724957', 'step': 10406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:29.779546', 'step': 10406, 'epoch': 2} {'type': 'loss', 'content': 0.09552454203367233, 'timestamp': '2025-09-10 02:47:29.781842', 'step': 10407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:29.836023', 'step': 10407, 'epoch': 2} {'type': 'loss', 'content': 0.1143832877278328, 'timestamp': '2025-09-10 02:47:29.842409', 'step': 10408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:29.896204', 'step': 10408, 'epoch': 2} {'type': 'loss', 'content': 0.07572820782661438, 'timestamp': '2025-09-10 02:47:29.898578', 'step': 10409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:29.952059', 'step': 10409, 'epoch': 2} {'type': 'loss', 'content': 0.0963490828871727, 'timestamp': '2025-09-10 02:47:29.954483', 'step': 10410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:30.008017', 'step': 10410, 'epoch': 2} {'type': 'loss', 'content': 0.14131799340248108, 'timestamp': '2025-09-10 02:47:30.010461', 'step': 10411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:30.064006', 'step': 10411, 'epoch': 2} {'type': 'loss', 'content': 0.2558886408805847, 'timestamp': '2025-09-10 02:47:30.070283', 'step': 10412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:30.125594', 'step': 10412, 'epoch': 2} {'type': 'loss', 'content': 0.1053699254989624, 'timestamp': '2025-09-10 02:47:30.127812', 'step': 10413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:30.181058', 'step': 10413, 'epoch': 2} {'type': 'loss', 'content': 0.11466459184885025, 'timestamp': '2025-09-10 02:47:30.183462', 'step': 10414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:30.236615', 'step': 10414, 'epoch': 2} {'type': 'loss', 'content': 0.11825539171695709, 'timestamp': '2025-09-10 02:47:30.238898', 'step': 10415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:30.292482', 'step': 10415, 'epoch': 2} {'type': 'loss', 'content': 0.09009165316820145, 'timestamp': '2025-09-10 02:47:30.298608', 'step': 10416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:30.351479', 'step': 10416, 'epoch': 2} {'type': 'loss', 'content': 0.1442592889070511, 'timestamp': '2025-09-10 02:47:30.353686', 'step': 10417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:30.406638', 'step': 10417, 'epoch': 2} {'type': 'loss', 'content': 0.23310069739818573, 'timestamp': '2025-09-10 02:47:30.409145', 'step': 10418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:30.462509', 'step': 10418, 'epoch': 2} {'type': 'loss', 'content': 0.13716571033000946, 'timestamp': '2025-09-10 02:47:30.465030', 'step': 10419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:30.518594', 'step': 10419, 'epoch': 2} {'type': 'loss', 'content': 0.16489313542842865, 'timestamp': '2025-09-10 02:47:30.524450', 'step': 10420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:30.577612', 'step': 10420, 'epoch': 2} {'type': 'loss', 'content': 0.13967613875865936, 'timestamp': '2025-09-10 02:47:30.579553', 'step': 10421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:30.633983', 'step': 10421, 'epoch': 2} {'type': 'loss', 'content': 0.13592621684074402, 'timestamp': '2025-09-10 02:47:30.636523', 'step': 10422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:30.692738', 'step': 10422, 'epoch': 2} {'type': 'loss', 'content': 0.1651826947927475, 'timestamp': '2025-09-10 02:47:30.695194', 'step': 10423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:30.749047', 'step': 10423, 'epoch': 2} {'type': 'loss', 'content': 0.11365539580583572, 'timestamp': '2025-09-10 02:47:30.755343', 'step': 10424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:30.813793', 'step': 10424, 'epoch': 2} {'type': 'loss', 'content': 0.15406973659992218, 'timestamp': '2025-09-10 02:47:30.816160', 'step': 10425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:30.874449', 'step': 10425, 'epoch': 2} {'type': 'loss', 'content': 0.14966541528701782, 'timestamp': '2025-09-10 02:47:30.876835', 'step': 10426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:30.930291', 'step': 10426, 'epoch': 2} {'type': 'loss', 'content': 0.13062673807144165, 'timestamp': '2025-09-10 02:47:30.932537', 'step': 10427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:30.986188', 'step': 10427, 'epoch': 2} {'type': 'loss', 'content': 0.0809975415468216, 'timestamp': '2025-09-10 02:47:30.992392', 'step': 10428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:31.045677', 'step': 10428, 'epoch': 2} {'type': 'loss', 'content': 0.11466261744499207, 'timestamp': '2025-09-10 02:47:31.048030', 'step': 10429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:31.101624', 'step': 10429, 'epoch': 2} {'type': 'loss', 'content': 0.10108686983585358, 'timestamp': '2025-09-10 02:47:31.103895', 'step': 10430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:31.157253', 'step': 10430, 'epoch': 2} {'type': 'loss', 'content': 0.17856743931770325, 'timestamp': '2025-09-10 02:47:31.159678', 'step': 10431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:31.214857', 'step': 10431, 'epoch': 2} {'type': 'loss', 'content': 0.13002154231071472, 'timestamp': '2025-09-10 02:47:31.221637', 'step': 10432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:31.276087', 'step': 10432, 'epoch': 2} {'type': 'loss', 'content': 0.13915328681468964, 'timestamp': '2025-09-10 02:47:31.278576', 'step': 10433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:31.333507', 'step': 10433, 'epoch': 2} {'type': 'loss', 'content': 0.11666469275951385, 'timestamp': '2025-09-10 02:47:31.335804', 'step': 10434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:31.389659', 'step': 10434, 'epoch': 2} {'type': 'loss', 'content': 0.13469889760017395, 'timestamp': '2025-09-10 02:47:31.391931', 'step': 10435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:31.445947', 'step': 10435, 'epoch': 2} {'type': 'loss', 'content': 0.06680098921060562, 'timestamp': '2025-09-10 02:47:31.452082', 'step': 10436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:31.505976', 'step': 10436, 'epoch': 2} {'type': 'loss', 'content': 0.14991885423660278, 'timestamp': '2025-09-10 02:47:31.508262', 'step': 10437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:31.561960', 'step': 10437, 'epoch': 2} {'type': 'loss', 'content': 0.07012931257486343, 'timestamp': '2025-09-10 02:47:31.564169', 'step': 10438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:31.618520', 'step': 10438, 'epoch': 2} {'type': 'loss', 'content': 0.059845469892024994, 'timestamp': '2025-09-10 02:47:31.620811', 'step': 10439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:31.674751', 'step': 10439, 'epoch': 2} {'type': 'loss', 'content': 0.1715790182352066, 'timestamp': '2025-09-10 02:47:31.680880', 'step': 10440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:31.735253', 'step': 10440, 'epoch': 2} {'type': 'loss', 'content': 0.09407854825258255, 'timestamp': '2025-09-10 02:47:31.737470', 'step': 10441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:31.791747', 'step': 10441, 'epoch': 2} {'type': 'loss', 'content': 0.1094169020652771, 'timestamp': '2025-09-10 02:47:31.793945', 'step': 10442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:31.848699', 'step': 10442, 'epoch': 2} {'type': 'loss', 'content': 0.11617134511470795, 'timestamp': '2025-09-10 02:47:31.850888', 'step': 10443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:31.905016', 'step': 10443, 'epoch': 2} {'type': 'loss', 'content': 0.17129868268966675, 'timestamp': '2025-09-10 02:47:31.911322', 'step': 10444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:31.964786', 'step': 10444, 'epoch': 2} {'type': 'loss', 'content': 0.08608701080083847, 'timestamp': '2025-09-10 02:47:31.966998', 'step': 10445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:32.021314', 'step': 10445, 'epoch': 2} {'type': 'loss', 'content': 0.09848801791667938, 'timestamp': '2025-09-10 02:47:32.023691', 'step': 10446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:32.078400', 'step': 10446, 'epoch': 2} {'type': 'loss', 'content': 0.06107737869024277, 'timestamp': '2025-09-10 02:47:32.080774', 'step': 10447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:32.136512', 'step': 10447, 'epoch': 2} {'type': 'loss', 'content': 0.1647021472454071, 'timestamp': '2025-09-10 02:47:32.142847', 'step': 10448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:32.196184', 'step': 10448, 'epoch': 2} {'type': 'loss', 'content': 0.1754029244184494, 'timestamp': '2025-09-10 02:47:32.198312', 'step': 10449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:32.253759', 'step': 10449, 'epoch': 2} {'type': 'loss', 'content': 0.13106589019298553, 'timestamp': '2025-09-10 02:47:32.256139', 'step': 10450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:32.310821', 'step': 10450, 'epoch': 2} {'type': 'loss', 'content': 0.11514890938997269, 'timestamp': '2025-09-10 02:47:32.313032', 'step': 10451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:32.367733', 'step': 10451, 'epoch': 2} {'type': 'loss', 'content': 0.1928924024105072, 'timestamp': '2025-09-10 02:47:32.374252', 'step': 10452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:32.429313', 'step': 10452, 'epoch': 2} {'type': 'loss', 'content': 0.17817552387714386, 'timestamp': '2025-09-10 02:47:32.431510', 'step': 10453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:32.487055', 'step': 10453, 'epoch': 2} {'type': 'loss', 'content': 0.148628368973732, 'timestamp': '2025-09-10 02:47:32.489301', 'step': 10454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:32.545068', 'step': 10454, 'epoch': 2} {'type': 'loss', 'content': 0.1471693366765976, 'timestamp': '2025-09-10 02:47:32.547221', 'step': 10455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:32.602026', 'step': 10455, 'epoch': 2} {'type': 'loss', 'content': 0.12166137248277664, 'timestamp': '2025-09-10 02:47:32.608335', 'step': 10456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:32.662336', 'step': 10456, 'epoch': 2} {'type': 'loss', 'content': 0.09973806142807007, 'timestamp': '2025-09-10 02:47:32.664588', 'step': 10457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:32.732401', 'step': 10457, 'epoch': 2} {'type': 'loss', 'content': 0.14939464628696442, 'timestamp': '2025-09-10 02:47:32.734347', 'step': 10458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:32.789488', 'step': 10458, 'epoch': 2} {'type': 'loss', 'content': 0.12708887457847595, 'timestamp': '2025-09-10 02:47:32.791724', 'step': 10459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:32.847384', 'step': 10459, 'epoch': 2} {'type': 'loss', 'content': 0.13151803612709045, 'timestamp': '2025-09-10 02:47:32.853772', 'step': 10460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:32.909559', 'step': 10460, 'epoch': 2} {'type': 'loss', 'content': 0.06856699287891388, 'timestamp': '2025-09-10 02:47:32.911787', 'step': 10461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:32.966734', 'step': 10461, 'epoch': 2} {'type': 'loss', 'content': 0.13524089753627777, 'timestamp': '2025-09-10 02:47:32.968894', 'step': 10462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:33.023830', 'step': 10462, 'epoch': 2} {'type': 'loss', 'content': 0.131080761551857, 'timestamp': '2025-09-10 02:47:33.025754', 'step': 10463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:33.079365', 'step': 10463, 'epoch': 2} {'type': 'loss', 'content': 0.09769968688488007, 'timestamp': '2025-09-10 02:47:33.085658', 'step': 10464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:33.139755', 'step': 10464, 'epoch': 2} {'type': 'loss', 'content': 0.06709633022546768, 'timestamp': '2025-09-10 02:47:33.141693', 'step': 10465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:33.195867', 'step': 10465, 'epoch': 2} {'type': 'loss', 'content': 0.12566635012626648, 'timestamp': '2025-09-10 02:47:33.198016', 'step': 10466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:33.252261', 'step': 10466, 'epoch': 2} {'type': 'loss', 'content': 0.15932078659534454, 'timestamp': '2025-09-10 02:47:33.254546', 'step': 10467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:33.308698', 'step': 10467, 'epoch': 2} {'type': 'loss', 'content': 0.04589023441076279, 'timestamp': '2025-09-10 02:47:33.315119', 'step': 10468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:33.368773', 'step': 10468, 'epoch': 2} {'type': 'loss', 'content': 0.1658903807401657, 'timestamp': '2025-09-10 02:47:33.370752', 'step': 10469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:33.424613', 'step': 10469, 'epoch': 2} {'type': 'loss', 'content': 0.2062404602766037, 'timestamp': '2025-09-10 02:47:33.426672', 'step': 10470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:33.480517', 'step': 10470, 'epoch': 2} {'type': 'loss', 'content': 0.10312113910913467, 'timestamp': '2025-09-10 02:47:33.482491', 'step': 10471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:33.536203', 'step': 10471, 'epoch': 2} {'type': 'loss', 'content': 0.036256033927202225, 'timestamp': '2025-09-10 02:47:33.542374', 'step': 10472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:33.595319', 'step': 10472, 'epoch': 2} {'type': 'loss', 'content': 0.09025153517723083, 'timestamp': '2025-09-10 02:47:33.597466', 'step': 10473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:33.651378', 'step': 10473, 'epoch': 2} {'type': 'loss', 'content': 0.09890585392713547, 'timestamp': '2025-09-10 02:47:33.653463', 'step': 10474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:33.706922', 'step': 10474, 'epoch': 2} {'type': 'loss', 'content': 0.05255037546157837, 'timestamp': '2025-09-10 02:47:33.709198', 'step': 10475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:33.763030', 'step': 10475, 'epoch': 2} {'type': 'loss', 'content': 0.14182326197624207, 'timestamp': '2025-09-10 02:47:33.769044', 'step': 10476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:33.821933', 'step': 10476, 'epoch': 2} {'type': 'loss', 'content': 0.15623202919960022, 'timestamp': '2025-09-10 02:47:33.823715', 'step': 10477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:33.877243', 'step': 10477, 'epoch': 2} {'type': 'loss', 'content': 0.17401835322380066, 'timestamp': '2025-09-10 02:47:33.879192', 'step': 10478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:33.933318', 'step': 10478, 'epoch': 2} {'type': 'loss', 'content': 0.05934284254908562, 'timestamp': '2025-09-10 02:47:33.935487', 'step': 10479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:33.991707', 'step': 10479, 'epoch': 2} {'type': 'loss', 'content': 0.10757771134376526, 'timestamp': '2025-09-10 02:47:33.998085', 'step': 10480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:34.052965', 'step': 10480, 'epoch': 2} {'type': 'loss', 'content': 0.09764761477708817, 'timestamp': '2025-09-10 02:47:34.054939', 'step': 10481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:34.109339', 'step': 10481, 'epoch': 2} {'type': 'loss', 'content': 0.12210826575756073, 'timestamp': '2025-09-10 02:47:34.112693', 'step': 10482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:34.168918', 'step': 10482, 'epoch': 2} {'type': 'loss', 'content': 0.16978870332241058, 'timestamp': '2025-09-10 02:47:34.170889', 'step': 10483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:34.225437', 'step': 10483, 'epoch': 2} {'type': 'loss', 'content': 0.09496726840734482, 'timestamp': '2025-09-10 02:47:34.231698', 'step': 10484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:34.285742', 'step': 10484, 'epoch': 2} {'type': 'loss', 'content': 0.13014565408229828, 'timestamp': '2025-09-10 02:47:34.287865', 'step': 10485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:34.342030', 'step': 10485, 'epoch': 2} {'type': 'loss', 'content': 0.03672409430146217, 'timestamp': '2025-09-10 02:47:34.344273', 'step': 10486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:34.397869', 'step': 10486, 'epoch': 2} {'type': 'loss', 'content': 0.13383501768112183, 'timestamp': '2025-09-10 02:47:34.400151', 'step': 10487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:34.454382', 'step': 10487, 'epoch': 2} {'type': 'loss', 'content': 0.17944613099098206, 'timestamp': '2025-09-10 02:47:34.460704', 'step': 10488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:34.514399', 'step': 10488, 'epoch': 2} {'type': 'loss', 'content': 0.09500780701637268, 'timestamp': '2025-09-10 02:47:34.516669', 'step': 10489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:34.570452', 'step': 10489, 'epoch': 2} {'type': 'loss', 'content': 0.15412288904190063, 'timestamp': '2025-09-10 02:47:34.572743', 'step': 10490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:34.626600', 'step': 10490, 'epoch': 2} {'type': 'loss', 'content': 0.13697724044322968, 'timestamp': '2025-09-10 02:47:34.629180', 'step': 10491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:34.684358', 'step': 10491, 'epoch': 2} {'type': 'loss', 'content': 0.13867075741291046, 'timestamp': '2025-09-10 02:47:34.690790', 'step': 10492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:34.745796', 'step': 10492, 'epoch': 2} {'type': 'loss', 'content': 0.14473538100719452, 'timestamp': '2025-09-10 02:47:34.747721', 'step': 10493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:34.802756', 'step': 10493, 'epoch': 2} {'type': 'loss', 'content': 0.14659152925014496, 'timestamp': '2025-09-10 02:47:34.805787', 'step': 10494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:34.860649', 'step': 10494, 'epoch': 2} {'type': 'loss', 'content': 0.10582268238067627, 'timestamp': '2025-09-10 02:47:34.863128', 'step': 10495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:34.918644', 'step': 10495, 'epoch': 2} {'type': 'loss', 'content': 0.14090435206890106, 'timestamp': '2025-09-10 02:47:34.925074', 'step': 10496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:34.979559', 'step': 10496, 'epoch': 2} {'type': 'loss', 'content': 0.11249499768018723, 'timestamp': '2025-09-10 02:47:34.981658', 'step': 10497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:35.038387', 'step': 10497, 'epoch': 2} {'type': 'loss', 'content': 0.18311290442943573, 'timestamp': '2025-09-10 02:47:35.041933', 'step': 10498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:35.098181', 'step': 10498, 'epoch': 2} {'type': 'loss', 'content': 0.08878447860479355, 'timestamp': '2025-09-10 02:47:35.100773', 'step': 10499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:35.157559', 'step': 10499, 'epoch': 2} {'type': 'loss', 'content': 0.14623089134693146, 'timestamp': '2025-09-10 02:47:35.166879', 'step': 10500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10500', 'timestamp': '2025-09-10 02:47:35.620919', 'step': 10500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:35.680463', 'step': 10500, 'epoch': 2} {'type': 'loss', 'content': 0.09071096032857895, 'timestamp': '2025-09-10 02:47:35.683785', 'step': 10501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:35.745199', 'step': 10501, 'epoch': 2} {'type': 'loss', 'content': 0.11573828756809235, 'timestamp': '2025-09-10 02:47:35.747551', 'step': 10502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:35.803066', 'step': 10502, 'epoch': 2} {'type': 'loss', 'content': 0.16256603598594666, 'timestamp': '2025-09-10 02:47:35.808608', 'step': 10503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:35.867520', 'step': 10503, 'epoch': 2} {'type': 'loss', 'content': 0.1652117222547531, 'timestamp': '2025-09-10 02:47:35.873870', 'step': 10504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:35.928434', 'step': 10504, 'epoch': 2} {'type': 'loss', 'content': 0.2498210072517395, 'timestamp': '2025-09-10 02:47:35.930685', 'step': 10505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:35.986276', 'step': 10505, 'epoch': 2} {'type': 'loss', 'content': 0.07442465424537659, 'timestamp': '2025-09-10 02:47:35.988666', 'step': 10506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:36.046225', 'step': 10506, 'epoch': 2} {'type': 'loss', 'content': 0.19224698841571808, 'timestamp': '2025-09-10 02:47:36.048875', 'step': 10507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:36.105532', 'step': 10507, 'epoch': 2} {'type': 'loss', 'content': 0.09421615302562714, 'timestamp': '2025-09-10 02:47:36.112525', 'step': 10508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:36.168242', 'step': 10508, 'epoch': 2} {'type': 'loss', 'content': 0.10444623976945877, 'timestamp': '2025-09-10 02:47:36.170464', 'step': 10509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:36.225873', 'step': 10509, 'epoch': 2} {'type': 'loss', 'content': 0.06382209807634354, 'timestamp': '2025-09-10 02:47:36.227956', 'step': 10510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:36.281951', 'step': 10510, 'epoch': 2} {'type': 'loss', 'content': 0.16210250556468964, 'timestamp': '2025-09-10 02:47:36.283962', 'step': 10511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:36.337887', 'step': 10511, 'epoch': 2} {'type': 'loss', 'content': 0.12587402760982513, 'timestamp': '2025-09-10 02:47:36.344152', 'step': 10512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:36.397371', 'step': 10512, 'epoch': 2} {'type': 'loss', 'content': 0.15008080005645752, 'timestamp': '2025-09-10 02:47:36.399462', 'step': 10513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:36.452682', 'step': 10513, 'epoch': 2} {'type': 'loss', 'content': 0.14293569326400757, 'timestamp': '2025-09-10 02:47:36.455003', 'step': 10514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:36.509114', 'step': 10514, 'epoch': 2} {'type': 'loss', 'content': 0.08019313961267471, 'timestamp': '2025-09-10 02:47:36.511207', 'step': 10515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:36.566701', 'step': 10515, 'epoch': 2} {'type': 'loss', 'content': 0.17215225100517273, 'timestamp': '2025-09-10 02:47:36.573146', 'step': 10516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:36.626914', 'step': 10516, 'epoch': 2} {'type': 'loss', 'content': 0.069805808365345, 'timestamp': '2025-09-10 02:47:36.629488', 'step': 10517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:36.683832', 'step': 10517, 'epoch': 2} {'type': 'loss', 'content': 0.19433359801769257, 'timestamp': '2025-09-10 02:47:36.686200', 'step': 10518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:36.741351', 'step': 10518, 'epoch': 2} {'type': 'loss', 'content': 0.1261354684829712, 'timestamp': '2025-09-10 02:47:36.743675', 'step': 10519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:36.798393', 'step': 10519, 'epoch': 2} {'type': 'loss', 'content': 0.21170714497566223, 'timestamp': '2025-09-10 02:47:36.804841', 'step': 10520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:36.859434', 'step': 10520, 'epoch': 2} {'type': 'loss', 'content': 0.03165853023529053, 'timestamp': '2025-09-10 02:47:36.861442', 'step': 10521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:36.916313', 'step': 10521, 'epoch': 2} {'type': 'loss', 'content': 0.07768987119197845, 'timestamp': '2025-09-10 02:47:36.918311', 'step': 10522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:36.973170', 'step': 10522, 'epoch': 2} {'type': 'loss', 'content': 0.24799254536628723, 'timestamp': '2025-09-10 02:47:36.975298', 'step': 10523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:37.029191', 'step': 10523, 'epoch': 2} {'type': 'loss', 'content': 0.11706850677728653, 'timestamp': '2025-09-10 02:47:37.035668', 'step': 10524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:37.090026', 'step': 10524, 'epoch': 2} {'type': 'loss', 'content': 0.1181013286113739, 'timestamp': '2025-09-10 02:47:37.092046', 'step': 10525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:37.147055', 'step': 10525, 'epoch': 2} {'type': 'loss', 'content': 0.06627094745635986, 'timestamp': '2025-09-10 02:47:37.149026', 'step': 10526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:37.202990', 'step': 10526, 'epoch': 2} {'type': 'loss', 'content': 0.11378978192806244, 'timestamp': '2025-09-10 02:47:37.205236', 'step': 10527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:37.260183', 'step': 10527, 'epoch': 2} {'type': 'loss', 'content': 0.09792515635490417, 'timestamp': '2025-09-10 02:47:37.266536', 'step': 10528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:37.320355', 'step': 10528, 'epoch': 2} {'type': 'loss', 'content': 0.23292778432369232, 'timestamp': '2025-09-10 02:47:37.322529', 'step': 10529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:37.379410', 'step': 10529, 'epoch': 2} {'type': 'loss', 'content': 0.16525520384311676, 'timestamp': '2025-09-10 02:47:37.381647', 'step': 10530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:37.438332', 'step': 10530, 'epoch': 2} {'type': 'loss', 'content': 0.15793612599372864, 'timestamp': '2025-09-10 02:47:37.440592', 'step': 10531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:37.495130', 'step': 10531, 'epoch': 2} {'type': 'loss', 'content': 0.15664522349834442, 'timestamp': '2025-09-10 02:47:37.501670', 'step': 10532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:37.555741', 'step': 10532, 'epoch': 2} {'type': 'loss', 'content': 0.17500251531600952, 'timestamp': '2025-09-10 02:47:37.558190', 'step': 10533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:37.615452', 'step': 10533, 'epoch': 2} {'type': 'loss', 'content': 0.10344734787940979, 'timestamp': '2025-09-10 02:47:37.617714', 'step': 10534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:37.673707', 'step': 10534, 'epoch': 2} {'type': 'loss', 'content': 0.09515856951475143, 'timestamp': '2025-09-10 02:47:37.675962', 'step': 10535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:37.731301', 'step': 10535, 'epoch': 2} {'type': 'loss', 'content': 0.049669671803712845, 'timestamp': '2025-09-10 02:47:37.737566', 'step': 10536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:37.791357', 'step': 10536, 'epoch': 2} {'type': 'loss', 'content': 0.1443883776664734, 'timestamp': '2025-09-10 02:47:37.793419', 'step': 10537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:37.848116', 'step': 10537, 'epoch': 2} {'type': 'loss', 'content': 0.11524713039398193, 'timestamp': '2025-09-10 02:47:37.850406', 'step': 10538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:37.904654', 'step': 10538, 'epoch': 2} {'type': 'loss', 'content': 0.10191613435745239, 'timestamp': '2025-09-10 02:47:37.906912', 'step': 10539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:37.960675', 'step': 10539, 'epoch': 2} {'type': 'loss', 'content': 0.24176248908042908, 'timestamp': '2025-09-10 02:47:37.966772', 'step': 10540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:38.024031', 'step': 10540, 'epoch': 2} {'type': 'loss', 'content': 0.1148550882935524, 'timestamp': '2025-09-10 02:47:38.026172', 'step': 10541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:38.080532', 'step': 10541, 'epoch': 2} {'type': 'loss', 'content': 0.12949982285499573, 'timestamp': '2025-09-10 02:47:38.082697', 'step': 10542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:38.136786', 'step': 10542, 'epoch': 2} {'type': 'loss', 'content': 0.16152146458625793, 'timestamp': '2025-09-10 02:47:38.138751', 'step': 10543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:38.192898', 'step': 10543, 'epoch': 2} {'type': 'loss', 'content': 0.11011359095573425, 'timestamp': '2025-09-10 02:47:38.199122', 'step': 10544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:38.252703', 'step': 10544, 'epoch': 2} {'type': 'loss', 'content': 0.04576810449361801, 'timestamp': '2025-09-10 02:47:38.254810', 'step': 10545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:38.308936', 'step': 10545, 'epoch': 2} {'type': 'loss', 'content': 0.1440444439649582, 'timestamp': '2025-09-10 02:47:38.311155', 'step': 10546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:38.368116', 'step': 10546, 'epoch': 2} {'type': 'loss', 'content': 0.10747501254081726, 'timestamp': '2025-09-10 02:47:38.370451', 'step': 10547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:38.423843', 'step': 10547, 'epoch': 2} {'type': 'loss', 'content': 0.1736403852701187, 'timestamp': '2025-09-10 02:47:38.429674', 'step': 10548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:38.482990', 'step': 10548, 'epoch': 2} {'type': 'loss', 'content': 0.20982827246189117, 'timestamp': '2025-09-10 02:47:38.485116', 'step': 10549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:38.537954', 'step': 10549, 'epoch': 2} {'type': 'loss', 'content': 0.08058473467826843, 'timestamp': '2025-09-10 02:47:38.539984', 'step': 10550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:38.593334', 'step': 10550, 'epoch': 2} {'type': 'loss', 'content': 0.19334499537944794, 'timestamp': '2025-09-10 02:47:38.595348', 'step': 10551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:38.648368', 'step': 10551, 'epoch': 2} {'type': 'loss', 'content': 0.1293778419494629, 'timestamp': '2025-09-10 02:47:38.654480', 'step': 10552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:38.708316', 'step': 10552, 'epoch': 2} {'type': 'loss', 'content': 0.18380232155323029, 'timestamp': '2025-09-10 02:47:38.710504', 'step': 10553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:38.764035', 'step': 10553, 'epoch': 2} {'type': 'loss', 'content': 0.08902379870414734, 'timestamp': '2025-09-10 02:47:38.765923', 'step': 10554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:38.820316', 'step': 10554, 'epoch': 2} {'type': 'loss', 'content': 0.1203642189502716, 'timestamp': '2025-09-10 02:47:38.822356', 'step': 10555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:38.875755', 'step': 10555, 'epoch': 2} {'type': 'loss', 'content': 0.12150879949331284, 'timestamp': '2025-09-10 02:47:38.881900', 'step': 10556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:38.935935', 'step': 10556, 'epoch': 2} {'type': 'loss', 'content': 0.07919593900442123, 'timestamp': '2025-09-10 02:47:38.938313', 'step': 10557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:38.991514', 'step': 10557, 'epoch': 2} {'type': 'loss', 'content': 0.14742903411388397, 'timestamp': '2025-09-10 02:47:38.993774', 'step': 10558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:39.048128', 'step': 10558, 'epoch': 2} {'type': 'loss', 'content': 0.08271346241235733, 'timestamp': '2025-09-10 02:47:39.050330', 'step': 10559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:39.104336', 'step': 10559, 'epoch': 2} {'type': 'loss', 'content': 0.13556945323944092, 'timestamp': '2025-09-10 02:47:39.110621', 'step': 10560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:39.165434', 'step': 10560, 'epoch': 2} {'type': 'loss', 'content': 0.1856672614812851, 'timestamp': '2025-09-10 02:47:39.170066', 'step': 10561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:39.233522', 'step': 10561, 'epoch': 2} {'type': 'loss', 'content': 0.09961242228746414, 'timestamp': '2025-09-10 02:47:39.237332', 'step': 10562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:39.293009', 'step': 10562, 'epoch': 2} {'type': 'loss', 'content': 0.20962876081466675, 'timestamp': '2025-09-10 02:47:39.295204', 'step': 10563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:39.350345', 'step': 10563, 'epoch': 2} {'type': 'loss', 'content': 0.08077429234981537, 'timestamp': '2025-09-10 02:47:39.356491', 'step': 10564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:39.410771', 'step': 10564, 'epoch': 2} {'type': 'loss', 'content': 0.15754146873950958, 'timestamp': '2025-09-10 02:47:39.416851', 'step': 10565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:39.472442', 'step': 10565, 'epoch': 2} {'type': 'loss', 'content': 0.1504075527191162, 'timestamp': '2025-09-10 02:47:39.474917', 'step': 10566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:39.528701', 'step': 10566, 'epoch': 2} {'type': 'loss', 'content': 0.09282898157835007, 'timestamp': '2025-09-10 02:47:39.530852', 'step': 10567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:39.585255', 'step': 10567, 'epoch': 2} {'type': 'loss', 'content': 0.095396488904953, 'timestamp': '2025-09-10 02:47:39.596186', 'step': 10568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:39.658451', 'step': 10568, 'epoch': 2} {'type': 'loss', 'content': 0.08357546478509903, 'timestamp': '2025-09-10 02:47:39.660445', 'step': 10569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:39.715198', 'step': 10569, 'epoch': 2} {'type': 'loss', 'content': 0.11801935732364655, 'timestamp': '2025-09-10 02:47:39.717954', 'step': 10570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:39.785913', 'step': 10570, 'epoch': 2} {'type': 'loss', 'content': 0.0980016440153122, 'timestamp': '2025-09-10 02:47:39.788207', 'step': 10571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:39.843341', 'step': 10571, 'epoch': 2} {'type': 'loss', 'content': 0.17068830132484436, 'timestamp': '2025-09-10 02:47:39.849784', 'step': 10572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:39.907003', 'step': 10572, 'epoch': 2} {'type': 'loss', 'content': 0.11411045491695404, 'timestamp': '2025-09-10 02:47:39.914300', 'step': 10573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:39.972380', 'step': 10573, 'epoch': 2} {'type': 'loss', 'content': 0.1500149816274643, 'timestamp': '2025-09-10 02:47:39.974631', 'step': 10574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:40.032717', 'step': 10574, 'epoch': 2} {'type': 'loss', 'content': 0.1347184032201767, 'timestamp': '2025-09-10 02:47:40.034845', 'step': 10575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:40.089632', 'step': 10575, 'epoch': 2} {'type': 'loss', 'content': 0.21633075177669525, 'timestamp': '2025-09-10 02:47:40.098704', 'step': 10576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:40.152197', 'step': 10576, 'epoch': 2} {'type': 'loss', 'content': 0.16388435661792755, 'timestamp': '2025-09-10 02:47:40.154277', 'step': 10577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:40.208396', 'step': 10577, 'epoch': 2} {'type': 'loss', 'content': 0.06735358387231827, 'timestamp': '2025-09-10 02:47:40.210501', 'step': 10578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:40.265056', 'step': 10578, 'epoch': 2} {'type': 'loss', 'content': 0.12419871240854263, 'timestamp': '2025-09-10 02:47:40.267113', 'step': 10579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:40.323794', 'step': 10579, 'epoch': 2} {'type': 'loss', 'content': 0.047663863748311996, 'timestamp': '2025-09-10 02:47:40.330158', 'step': 10580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:40.385284', 'step': 10580, 'epoch': 2} {'type': 'loss', 'content': 0.09984171390533447, 'timestamp': '2025-09-10 02:47:40.387309', 'step': 10581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:40.441638', 'step': 10581, 'epoch': 2} {'type': 'loss', 'content': 0.13937672972679138, 'timestamp': '2025-09-10 02:47:40.443603', 'step': 10582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:40.498094', 'step': 10582, 'epoch': 2} {'type': 'loss', 'content': 0.10584250837564468, 'timestamp': '2025-09-10 02:47:40.500061', 'step': 10583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:40.555431', 'step': 10583, 'epoch': 2} {'type': 'loss', 'content': 0.07687931507825851, 'timestamp': '2025-09-10 02:47:40.561664', 'step': 10584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:40.616302', 'step': 10584, 'epoch': 2} {'type': 'loss', 'content': 0.11558555066585541, 'timestamp': '2025-09-10 02:47:40.618357', 'step': 10585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:40.673970', 'step': 10585, 'epoch': 2} {'type': 'loss', 'content': 0.14336691796779633, 'timestamp': '2025-09-10 02:47:40.675951', 'step': 10586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:40.730590', 'step': 10586, 'epoch': 2} {'type': 'loss', 'content': 0.08854330331087112, 'timestamp': '2025-09-10 02:47:40.733024', 'step': 10587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:40.787617', 'step': 10587, 'epoch': 2} {'type': 'loss', 'content': 0.08334477990865707, 'timestamp': '2025-09-10 02:47:40.793445', 'step': 10588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:40.847432', 'step': 10588, 'epoch': 2} {'type': 'loss', 'content': 0.09153340756893158, 'timestamp': '2025-09-10 02:47:40.849661', 'step': 10589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:40.905853', 'step': 10589, 'epoch': 2} {'type': 'loss', 'content': 0.18467313051223755, 'timestamp': '2025-09-10 02:47:40.908080', 'step': 10590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:40.964655', 'step': 10590, 'epoch': 2} {'type': 'loss', 'content': 0.11305460333824158, 'timestamp': '2025-09-10 02:47:40.966713', 'step': 10591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:41.021554', 'step': 10591, 'epoch': 2} {'type': 'loss', 'content': 0.1308714598417282, 'timestamp': '2025-09-10 02:47:41.028236', 'step': 10592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:41.084691', 'step': 10592, 'epoch': 2} {'type': 'loss', 'content': 0.1860390156507492, 'timestamp': '2025-09-10 02:47:41.086857', 'step': 10593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:41.141109', 'step': 10593, 'epoch': 2} {'type': 'loss', 'content': 0.08873093128204346, 'timestamp': '2025-09-10 02:47:41.143376', 'step': 10594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:41.197693', 'step': 10594, 'epoch': 2} {'type': 'loss', 'content': 0.13526299595832825, 'timestamp': '2025-09-10 02:47:41.199821', 'step': 10595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:41.254332', 'step': 10595, 'epoch': 2} {'type': 'loss', 'content': 0.1353173404932022, 'timestamp': '2025-09-10 02:47:41.260681', 'step': 10596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:41.314808', 'step': 10596, 'epoch': 2} {'type': 'loss', 'content': 0.10097260028123856, 'timestamp': '2025-09-10 02:47:41.316961', 'step': 10597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:41.373354', 'step': 10597, 'epoch': 2} {'type': 'loss', 'content': 0.06707833707332611, 'timestamp': '2025-09-10 02:47:41.375505', 'step': 10598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:41.431888', 'step': 10598, 'epoch': 2} {'type': 'loss', 'content': 0.09017740935087204, 'timestamp': '2025-09-10 02:47:41.434021', 'step': 10599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:41.492414', 'step': 10599, 'epoch': 2} {'type': 'loss', 'content': 0.18851740658283234, 'timestamp': '2025-09-10 02:47:41.498961', 'step': 10600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:41.560076', 'step': 10600, 'epoch': 2} {'type': 'loss', 'content': 0.18361669778823853, 'timestamp': '2025-09-10 02:47:41.562197', 'step': 10601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:41.621794', 'step': 10601, 'epoch': 2} {'type': 'loss', 'content': 0.08478125184774399, 'timestamp': '2025-09-10 02:47:41.624031', 'step': 10602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:41.680858', 'step': 10602, 'epoch': 2} {'type': 'loss', 'content': 0.2811919152736664, 'timestamp': '2025-09-10 02:47:41.683076', 'step': 10603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:41.739924', 'step': 10603, 'epoch': 2} {'type': 'loss', 'content': 0.09941937774419785, 'timestamp': '2025-09-10 02:47:41.746511', 'step': 10604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:41.802167', 'step': 10604, 'epoch': 2} {'type': 'loss', 'content': 0.1343728005886078, 'timestamp': '2025-09-10 02:47:41.804317', 'step': 10605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:41.861825', 'step': 10605, 'epoch': 2} {'type': 'loss', 'content': 0.18416553735733032, 'timestamp': '2025-09-10 02:47:41.863794', 'step': 10606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:41.923144', 'step': 10606, 'epoch': 2} {'type': 'loss', 'content': 0.14265099167823792, 'timestamp': '2025-09-10 02:47:41.925129', 'step': 10607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:41.987908', 'step': 10607, 'epoch': 2} {'type': 'loss', 'content': 0.16893120110034943, 'timestamp': '2025-09-10 02:47:41.994512', 'step': 10608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:42.050891', 'step': 10608, 'epoch': 2} {'type': 'loss', 'content': 0.1511220633983612, 'timestamp': '2025-09-10 02:47:42.052989', 'step': 10609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:42.112173', 'step': 10609, 'epoch': 2} {'type': 'loss', 'content': 0.15865889191627502, 'timestamp': '2025-09-10 02:47:42.114133', 'step': 10610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:42.173175', 'step': 10610, 'epoch': 2} {'type': 'loss', 'content': 0.2233964055776596, 'timestamp': '2025-09-10 02:47:42.175317', 'step': 10611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:42.231980', 'step': 10611, 'epoch': 2} {'type': 'loss', 'content': 0.1171046644449234, 'timestamp': '2025-09-10 02:47:42.238386', 'step': 10612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:42.295215', 'step': 10612, 'epoch': 2} {'type': 'loss', 'content': 0.10315939784049988, 'timestamp': '2025-09-10 02:47:42.297306', 'step': 10613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:42.356209', 'step': 10613, 'epoch': 2} {'type': 'loss', 'content': 0.12240635603666306, 'timestamp': '2025-09-10 02:47:42.358205', 'step': 10614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:42.416078', 'step': 10614, 'epoch': 2} {'type': 'loss', 'content': 0.15329425036907196, 'timestamp': '2025-09-10 02:47:42.418241', 'step': 10615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:42.476762', 'step': 10615, 'epoch': 2} {'type': 'loss', 'content': 0.14212535321712494, 'timestamp': '2025-09-10 02:47:42.483473', 'step': 10616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:42.541345', 'step': 10616, 'epoch': 2} {'type': 'loss', 'content': 0.1813092976808548, 'timestamp': '2025-09-10 02:47:42.543508', 'step': 10617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:42.601419', 'step': 10617, 'epoch': 2} {'type': 'loss', 'content': 0.12956443428993225, 'timestamp': '2025-09-10 02:47:42.603518', 'step': 10618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:42.661946', 'step': 10618, 'epoch': 2} {'type': 'loss', 'content': 0.08773919194936752, 'timestamp': '2025-09-10 02:47:42.664198', 'step': 10619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:42.722245', 'step': 10619, 'epoch': 2} {'type': 'loss', 'content': 0.13577713072299957, 'timestamp': '2025-09-10 02:47:42.729083', 'step': 10620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:42.785144', 'step': 10620, 'epoch': 2} {'type': 'loss', 'content': 0.16194455325603485, 'timestamp': '2025-09-10 02:47:42.787221', 'step': 10621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:42.846495', 'step': 10621, 'epoch': 2} {'type': 'loss', 'content': 0.10676520317792892, 'timestamp': '2025-09-10 02:47:42.848607', 'step': 10622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:42.905704', 'step': 10622, 'epoch': 2} {'type': 'loss', 'content': 0.15766707062721252, 'timestamp': '2025-09-10 02:47:42.907686', 'step': 10623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:42.965470', 'step': 10623, 'epoch': 2} {'type': 'loss', 'content': 0.1817055493593216, 'timestamp': '2025-09-10 02:47:42.972308', 'step': 10624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:43.029638', 'step': 10624, 'epoch': 2} {'type': 'loss', 'content': 0.24727365374565125, 'timestamp': '2025-09-10 02:47:43.031632', 'step': 10625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:43.089549', 'step': 10625, 'epoch': 2} {'type': 'loss', 'content': 0.15523940324783325, 'timestamp': '2025-09-10 02:47:43.091636', 'step': 10626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:43.152827', 'step': 10626, 'epoch': 2} {'type': 'loss', 'content': 0.2022097110748291, 'timestamp': '2025-09-10 02:47:43.154852', 'step': 10627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:43.214246', 'step': 10627, 'epoch': 2} {'type': 'loss', 'content': 0.19365575909614563, 'timestamp': '2025-09-10 02:47:43.220823', 'step': 10628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:43.278782', 'step': 10628, 'epoch': 2} {'type': 'loss', 'content': 0.0732729583978653, 'timestamp': '2025-09-10 02:47:43.280968', 'step': 10629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:43.338159', 'step': 10629, 'epoch': 2} {'type': 'loss', 'content': 0.09952715039253235, 'timestamp': '2025-09-10 02:47:43.340371', 'step': 10630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:43.399401', 'step': 10630, 'epoch': 2} {'type': 'loss', 'content': 0.10702851414680481, 'timestamp': '2025-09-10 02:47:43.401512', 'step': 10631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:43.460657', 'step': 10631, 'epoch': 2} {'type': 'loss', 'content': 0.17539146542549133, 'timestamp': '2025-09-10 02:47:43.467385', 'step': 10632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:43.527473', 'step': 10632, 'epoch': 2} {'type': 'loss', 'content': 0.14403903484344482, 'timestamp': '2025-09-10 02:47:43.529746', 'step': 10633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:43.589622', 'step': 10633, 'epoch': 2} {'type': 'loss', 'content': 0.13906505703926086, 'timestamp': '2025-09-10 02:47:43.591936', 'step': 10634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:43.649524', 'step': 10634, 'epoch': 2} {'type': 'loss', 'content': 0.13883821666240692, 'timestamp': '2025-09-10 02:47:43.651525', 'step': 10635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:43.710936', 'step': 10635, 'epoch': 2} {'type': 'loss', 'content': 0.1668424904346466, 'timestamp': '2025-09-10 02:47:43.717851', 'step': 10636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:43.777246', 'step': 10636, 'epoch': 2} {'type': 'loss', 'content': 0.14420390129089355, 'timestamp': '2025-09-10 02:47:43.779287', 'step': 10637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:43.836115', 'step': 10637, 'epoch': 2} {'type': 'loss', 'content': 0.17252199351787567, 'timestamp': '2025-09-10 02:47:43.838435', 'step': 10638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:43.898231', 'step': 10638, 'epoch': 2} {'type': 'loss', 'content': 0.1585867702960968, 'timestamp': '2025-09-10 02:47:43.900241', 'step': 10639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:43.957145', 'step': 10639, 'epoch': 2} {'type': 'loss', 'content': 0.1002076044678688, 'timestamp': '2025-09-10 02:47:43.963785', 'step': 10640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:44.020874', 'step': 10640, 'epoch': 2} {'type': 'loss', 'content': 0.11134111136198044, 'timestamp': '2025-09-10 02:47:44.023205', 'step': 10641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:44.087970', 'step': 10641, 'epoch': 2} {'type': 'loss', 'content': 0.0996025949716568, 'timestamp': '2025-09-10 02:47:44.090094', 'step': 10642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:44.147842', 'step': 10642, 'epoch': 2} {'type': 'loss', 'content': 0.08526831865310669, 'timestamp': '2025-09-10 02:47:44.149845', 'step': 10643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:44.207635', 'step': 10643, 'epoch': 2} {'type': 'loss', 'content': 0.12428158521652222, 'timestamp': '2025-09-10 02:47:44.214428', 'step': 10644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:44.272854', 'step': 10644, 'epoch': 2} {'type': 'loss', 'content': 0.23151668906211853, 'timestamp': '2025-09-10 02:47:44.274991', 'step': 10645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:44.333959', 'step': 10645, 'epoch': 2} {'type': 'loss', 'content': 0.11961288750171661, 'timestamp': '2025-09-10 02:47:44.336001', 'step': 10646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:44.396609', 'step': 10646, 'epoch': 2} {'type': 'loss', 'content': 0.12948815524578094, 'timestamp': '2025-09-10 02:47:44.398752', 'step': 10647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:44.456585', 'step': 10647, 'epoch': 2} {'type': 'loss', 'content': 0.1764206737279892, 'timestamp': '2025-09-10 02:47:44.463260', 'step': 10648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:44.520968', 'step': 10648, 'epoch': 2} {'type': 'loss', 'content': 0.1178690567612648, 'timestamp': '2025-09-10 02:47:44.523076', 'step': 10649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:44.583859', 'step': 10649, 'epoch': 2} {'type': 'loss', 'content': 0.09632839262485504, 'timestamp': '2025-09-10 02:47:44.585975', 'step': 10650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:44.644921', 'step': 10650, 'epoch': 2} {'type': 'loss', 'content': 0.08571871370077133, 'timestamp': '2025-09-10 02:47:44.646954', 'step': 10651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:44.704532', 'step': 10651, 'epoch': 2} {'type': 'loss', 'content': 0.09104397892951965, 'timestamp': '2025-09-10 02:47:44.711375', 'step': 10652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:44.768875', 'step': 10652, 'epoch': 2} {'type': 'loss', 'content': 0.09573227167129517, 'timestamp': '2025-09-10 02:47:44.771077', 'step': 10653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:44.827886', 'step': 10653, 'epoch': 2} {'type': 'loss', 'content': 0.14778722822666168, 'timestamp': '2025-09-10 02:47:44.829905', 'step': 10654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:44.890092', 'step': 10654, 'epoch': 2} {'type': 'loss', 'content': 0.0717451348900795, 'timestamp': '2025-09-10 02:47:44.892136', 'step': 10655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:44.950303', 'step': 10655, 'epoch': 2} {'type': 'loss', 'content': 0.08556295186281204, 'timestamp': '2025-09-10 02:47:44.957132', 'step': 10656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:45.012892', 'step': 10656, 'epoch': 2} {'type': 'loss', 'content': 0.05045642703771591, 'timestamp': '2025-09-10 02:47:45.014854', 'step': 10657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:45.075941', 'step': 10657, 'epoch': 2} {'type': 'loss', 'content': 0.07104678452014923, 'timestamp': '2025-09-10 02:47:45.078064', 'step': 10658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:45.136326', 'step': 10658, 'epoch': 2} {'type': 'loss', 'content': 0.15756963193416595, 'timestamp': '2025-09-10 02:47:45.138345', 'step': 10659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:45.196499', 'step': 10659, 'epoch': 2} {'type': 'loss', 'content': 0.07770398259162903, 'timestamp': '2025-09-10 02:47:45.203033', 'step': 10660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:45.260375', 'step': 10660, 'epoch': 2} {'type': 'loss', 'content': 0.1406479924917221, 'timestamp': '2025-09-10 02:47:45.262575', 'step': 10661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:45.319973', 'step': 10661, 'epoch': 2} {'type': 'loss', 'content': 0.18411897122859955, 'timestamp': '2025-09-10 02:47:45.322248', 'step': 10662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:45.379851', 'step': 10662, 'epoch': 2} {'type': 'loss', 'content': 0.18107271194458008, 'timestamp': '2025-09-10 02:47:45.381902', 'step': 10663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:45.440733', 'step': 10663, 'epoch': 2} {'type': 'loss', 'content': 0.17297740280628204, 'timestamp': '2025-09-10 02:47:45.447286', 'step': 10664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:45.504197', 'step': 10664, 'epoch': 2} {'type': 'loss', 'content': 0.11359681189060211, 'timestamp': '2025-09-10 02:47:45.506258', 'step': 10665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:45.563624', 'step': 10665, 'epoch': 2} {'type': 'loss', 'content': 0.10879870504140854, 'timestamp': '2025-09-10 02:47:45.565743', 'step': 10666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:45.624593', 'step': 10666, 'epoch': 2} {'type': 'loss', 'content': 0.11672157794237137, 'timestamp': '2025-09-10 02:47:45.626674', 'step': 10667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:45.685096', 'step': 10667, 'epoch': 2} {'type': 'loss', 'content': 0.10800236463546753, 'timestamp': '2025-09-10 02:47:45.691840', 'step': 10668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:45.750417', 'step': 10668, 'epoch': 2} {'type': 'loss', 'content': 0.07225091755390167, 'timestamp': '2025-09-10 02:47:45.752500', 'step': 10669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:45.810259', 'step': 10669, 'epoch': 2} {'type': 'loss', 'content': 0.16321156919002533, 'timestamp': '2025-09-10 02:47:45.812307', 'step': 10670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:45.870296', 'step': 10670, 'epoch': 2} {'type': 'loss', 'content': 0.13951171934604645, 'timestamp': '2025-09-10 02:47:45.873477', 'step': 10671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:45.934005', 'step': 10671, 'epoch': 2} {'type': 'loss', 'content': 0.10986211150884628, 'timestamp': '2025-09-10 02:47:45.942769', 'step': 10672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:46.002679', 'step': 10672, 'epoch': 2} {'type': 'loss', 'content': 0.11427432298660278, 'timestamp': '2025-09-10 02:47:46.004786', 'step': 10673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:46.061098', 'step': 10673, 'epoch': 2} {'type': 'loss', 'content': 0.12049650400876999, 'timestamp': '2025-09-10 02:47:46.064472', 'step': 10674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:46.122657', 'step': 10674, 'epoch': 2} {'type': 'loss', 'content': 0.15166722238063812, 'timestamp': '2025-09-10 02:47:46.124863', 'step': 10675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:46.182892', 'step': 10675, 'epoch': 2} {'type': 'loss', 'content': 0.09956540912389755, 'timestamp': '2025-09-10 02:47:46.189836', 'step': 10676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:46.248820', 'step': 10676, 'epoch': 2} {'type': 'loss', 'content': 0.18888826668262482, 'timestamp': '2025-09-10 02:47:46.251078', 'step': 10677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:46.310124', 'step': 10677, 'epoch': 2} {'type': 'loss', 'content': 0.11363229155540466, 'timestamp': '2025-09-10 02:47:46.312250', 'step': 10678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:46.377263', 'step': 10678, 'epoch': 2} {'type': 'loss', 'content': 0.12103937566280365, 'timestamp': '2025-09-10 02:47:46.379311', 'step': 10679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:46.435457', 'step': 10679, 'epoch': 2} {'type': 'loss', 'content': 0.11072096973657608, 'timestamp': '2025-09-10 02:47:46.441781', 'step': 10680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:46.497752', 'step': 10680, 'epoch': 2} {'type': 'loss', 'content': 0.1738237589597702, 'timestamp': '2025-09-10 02:47:46.499833', 'step': 10681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:46.557472', 'step': 10681, 'epoch': 2} {'type': 'loss', 'content': 0.13540202379226685, 'timestamp': '2025-09-10 02:47:46.559500', 'step': 10682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:46.619128', 'step': 10682, 'epoch': 2} {'type': 'loss', 'content': 0.1954653114080429, 'timestamp': '2025-09-10 02:47:46.624383', 'step': 10683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:46.689129', 'step': 10683, 'epoch': 2} {'type': 'loss', 'content': 0.12990596890449524, 'timestamp': '2025-09-10 02:47:46.695685', 'step': 10684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:46.753526', 'step': 10684, 'epoch': 2} {'type': 'loss', 'content': 0.11333730071783066, 'timestamp': '2025-09-10 02:47:46.755582', 'step': 10685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:46.825450', 'step': 10685, 'epoch': 2} {'type': 'loss', 'content': 0.08994289487600327, 'timestamp': '2025-09-10 02:47:46.827646', 'step': 10686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:46.887063', 'step': 10686, 'epoch': 2} {'type': 'loss', 'content': 0.18209712207317352, 'timestamp': '2025-09-10 02:47:46.889342', 'step': 10687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:46.947065', 'step': 10687, 'epoch': 2} {'type': 'loss', 'content': 0.2436666041612625, 'timestamp': '2025-09-10 02:47:46.953267', 'step': 10688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:47.008870', 'step': 10688, 'epoch': 2} {'type': 'loss', 'content': 0.07724342495203018, 'timestamp': '2025-09-10 02:47:47.011045', 'step': 10689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:47.068465', 'step': 10689, 'epoch': 2} {'type': 'loss', 'content': 0.12991608679294586, 'timestamp': '2025-09-10 02:47:47.070769', 'step': 10690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:47.129205', 'step': 10690, 'epoch': 2} {'type': 'loss', 'content': 0.10320943593978882, 'timestamp': '2025-09-10 02:47:47.131562', 'step': 10691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:47.188612', 'step': 10691, 'epoch': 2} {'type': 'loss', 'content': 0.13544520735740662, 'timestamp': '2025-09-10 02:47:47.195170', 'step': 10692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:47.252892', 'step': 10692, 'epoch': 2} {'type': 'loss', 'content': 0.06882910430431366, 'timestamp': '2025-09-10 02:47:47.255377', 'step': 10693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:47.313066', 'step': 10693, 'epoch': 2} {'type': 'loss', 'content': 0.1173972338438034, 'timestamp': '2025-09-10 02:47:47.315340', 'step': 10694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:47:47.371236', 'step': 10694, 'epoch': 2} {'type': 'loss', 'content': 0.22260579466819763, 'timestamp': '2025-09-10 02:47:47.373432', 'step': 10695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:47.428431', 'step': 10695, 'epoch': 2} {'type': 'loss', 'content': 0.11150958389043808, 'timestamp': '2025-09-10 02:47:47.435078', 'step': 10696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:47.490189', 'step': 10696, 'epoch': 2} {'type': 'loss', 'content': 0.14630810916423798, 'timestamp': '2025-09-10 02:47:47.492484', 'step': 10697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:47.547435', 'step': 10697, 'epoch': 2} {'type': 'loss', 'content': 0.1846017837524414, 'timestamp': '2025-09-10 02:47:47.549621', 'step': 10698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:47.604376', 'step': 10698, 'epoch': 2} {'type': 'loss', 'content': 0.09945423156023026, 'timestamp': '2025-09-10 02:47:47.606530', 'step': 10699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:47.663454', 'step': 10699, 'epoch': 2} {'type': 'loss', 'content': 0.11517088860273361, 'timestamp': '2025-09-10 02:47:47.670881', 'step': 10700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:47.725153', 'step': 10700, 'epoch': 2} {'type': 'loss', 'content': 0.11148470640182495, 'timestamp': '2025-09-10 02:47:47.727405', 'step': 10701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:47.782777', 'step': 10701, 'epoch': 2} {'type': 'loss', 'content': 0.21445755660533905, 'timestamp': '2025-09-10 02:47:47.784935', 'step': 10702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:47.841914', 'step': 10702, 'epoch': 2} {'type': 'loss', 'content': 0.09480330348014832, 'timestamp': '2025-09-10 02:47:47.844130', 'step': 10703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:47.899909', 'step': 10703, 'epoch': 2} {'type': 'loss', 'content': 0.17433464527130127, 'timestamp': '2025-09-10 02:47:47.906566', 'step': 10704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:47.961265', 'step': 10704, 'epoch': 2} {'type': 'loss', 'content': 0.0665772408246994, 'timestamp': '2025-09-10 02:47:47.963797', 'step': 10705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:48.019246', 'step': 10705, 'epoch': 2} {'type': 'loss', 'content': 0.19908739626407623, 'timestamp': '2025-09-10 02:47:48.021440', 'step': 10706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:48.076590', 'step': 10706, 'epoch': 2} {'type': 'loss', 'content': 0.04870350658893585, 'timestamp': '2025-09-10 02:47:48.078741', 'step': 10707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:48.134003', 'step': 10707, 'epoch': 2} {'type': 'loss', 'content': 0.217829629778862, 'timestamp': '2025-09-10 02:47:48.140640', 'step': 10708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:48.195892', 'step': 10708, 'epoch': 2} {'type': 'loss', 'content': 0.1934712827205658, 'timestamp': '2025-09-10 02:47:48.198201', 'step': 10709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:48.255790', 'step': 10709, 'epoch': 2} {'type': 'loss', 'content': 0.1294267475605011, 'timestamp': '2025-09-10 02:47:48.257964', 'step': 10710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:47:48.314644', 'step': 10710, 'epoch': 2} {'type': 'loss', 'content': 0.13457956910133362, 'timestamp': '2025-09-10 02:47:48.316662', 'step': 10711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:48.373753', 'step': 10711, 'epoch': 2} {'type': 'loss', 'content': 0.0878223180770874, 'timestamp': '2025-09-10 02:47:48.380269', 'step': 10712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:48.434297', 'step': 10712, 'epoch': 2} {'type': 'loss', 'content': 0.07343436032533646, 'timestamp': '2025-09-10 02:47:48.436502', 'step': 10713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:48.489515', 'step': 10713, 'epoch': 2} {'type': 'loss', 'content': 0.07722417265176773, 'timestamp': '2025-09-10 02:47:48.491523', 'step': 10714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:47:48.544625', 'step': 10714, 'epoch': 2} {'type': 'loss', 'content': 0.11254249513149261, 'timestamp': '2025-09-10 02:47:48.546626', 'step': 10715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:48.601366', 'step': 10715, 'epoch': 2} {'type': 'loss', 'content': 0.07953914999961853, 'timestamp': '2025-09-10 02:47:48.607476', 'step': 10716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:48.660104', 'step': 10716, 'epoch': 2} {'type': 'loss', 'content': 0.1286046952009201, 'timestamp': '2025-09-10 02:47:48.662340', 'step': 10717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:48.715355', 'step': 10717, 'epoch': 2} {'type': 'loss', 'content': 0.13206495344638824, 'timestamp': '2025-09-10 02:47:48.717640', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:48:01.395122', 'step': 10718, 'epoch': 2} {'type': 'pplx', 'content': 12957.20885120324, 'timestamp': '2025-09-10 02:48:01.398316', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:01.453666', 'step': 10718, 'epoch': 2} {'type': 'loss', 'content': 0.20029984414577484, 'timestamp': '2025-09-10 02:48:01.455655', 'step': 10719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:01.510900', 'step': 10719, 'epoch': 2} {'type': 'loss', 'content': 0.12327542155981064, 'timestamp': '2025-09-10 02:48:01.517220', 'step': 10720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:01.570488', 'step': 10720, 'epoch': 2} {'type': 'loss', 'content': 0.11955519765615463, 'timestamp': '2025-09-10 02:48:01.572671', 'step': 10721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:01.626325', 'step': 10721, 'epoch': 2} {'type': 'loss', 'content': 0.07304270565509796, 'timestamp': '2025-09-10 02:48:01.628500', 'step': 10722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:01.682313', 'step': 10722, 'epoch': 2} {'type': 'loss', 'content': 0.128060445189476, 'timestamp': '2025-09-10 02:48:01.684598', 'step': 10723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:01.739650', 'step': 10723, 'epoch': 2} {'type': 'loss', 'content': 0.14073000848293304, 'timestamp': '2025-09-10 02:48:01.745754', 'step': 10724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:01.798556', 'step': 10724, 'epoch': 2} {'type': 'loss', 'content': 0.11762184649705887, 'timestamp': '2025-09-10 02:48:01.800526', 'step': 10725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:01.854093', 'step': 10725, 'epoch': 2} {'type': 'loss', 'content': 0.2489822953939438, 'timestamp': '2025-09-10 02:48:01.856297', 'step': 10726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:01.910124', 'step': 10726, 'epoch': 2} {'type': 'loss', 'content': 0.19094610214233398, 'timestamp': '2025-09-10 02:48:01.912458', 'step': 10727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:01.966257', 'step': 10727, 'epoch': 2} {'type': 'loss', 'content': 0.10340171307325363, 'timestamp': '2025-09-10 02:48:01.972505', 'step': 10728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:02.025755', 'step': 10728, 'epoch': 2} {'type': 'loss', 'content': 0.15863710641860962, 'timestamp': '2025-09-10 02:48:02.027913', 'step': 10729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:02.081353', 'step': 10729, 'epoch': 2} {'type': 'loss', 'content': 0.11387743055820465, 'timestamp': '2025-09-10 02:48:02.083473', 'step': 10730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:02.136912', 'step': 10730, 'epoch': 2} {'type': 'loss', 'content': 0.11995228379964828, 'timestamp': '2025-09-10 02:48:02.139107', 'step': 10731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:02.192129', 'step': 10731, 'epoch': 2} {'type': 'loss', 'content': 0.04404936358332634, 'timestamp': '2025-09-10 02:48:02.198228', 'step': 10732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:02.250521', 'step': 10732, 'epoch': 2} {'type': 'loss', 'content': 0.15430974960327148, 'timestamp': '2025-09-10 02:48:02.252629', 'step': 10733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:02.306012', 'step': 10733, 'epoch': 2} {'type': 'loss', 'content': 0.13232950866222382, 'timestamp': '2025-09-10 02:48:02.308078', 'step': 10734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:02.361300', 'step': 10734, 'epoch': 2} {'type': 'loss', 'content': 0.05529484525322914, 'timestamp': '2025-09-10 02:48:02.363530', 'step': 10735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:02.417192', 'step': 10735, 'epoch': 2} {'type': 'loss', 'content': 0.13717558979988098, 'timestamp': '2025-09-10 02:48:02.423176', 'step': 10736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:02.476955', 'step': 10736, 'epoch': 2} {'type': 'loss', 'content': 0.12167124450206757, 'timestamp': '2025-09-10 02:48:02.479165', 'step': 10737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:02.533093', 'step': 10737, 'epoch': 2} {'type': 'loss', 'content': 0.081728495657444, 'timestamp': '2025-09-10 02:48:02.535307', 'step': 10738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:02.589807', 'step': 10738, 'epoch': 2} {'type': 'loss', 'content': 0.18244484066963196, 'timestamp': '2025-09-10 02:48:02.592018', 'step': 10739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:02.646333', 'step': 10739, 'epoch': 2} {'type': 'loss', 'content': 0.10488076508045197, 'timestamp': '2025-09-10 02:48:02.652539', 'step': 10740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:02.705818', 'step': 10740, 'epoch': 2} {'type': 'loss', 'content': 0.09595651179552078, 'timestamp': '2025-09-10 02:48:02.708076', 'step': 10741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:02.761226', 'step': 10741, 'epoch': 2} {'type': 'loss', 'content': 0.04837760329246521, 'timestamp': '2025-09-10 02:48:02.763361', 'step': 10742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:02.816884', 'step': 10742, 'epoch': 2} {'type': 'loss', 'content': 0.14720873534679413, 'timestamp': '2025-09-10 02:48:02.819109', 'step': 10743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:02.874359', 'step': 10743, 'epoch': 2} {'type': 'loss', 'content': 0.1475003957748413, 'timestamp': '2025-09-10 02:48:02.880385', 'step': 10744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:02.933873', 'step': 10744, 'epoch': 2} {'type': 'loss', 'content': 0.1983688771724701, 'timestamp': '2025-09-10 02:48:02.936035', 'step': 10745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:02.991439', 'step': 10745, 'epoch': 2} {'type': 'loss', 'content': 0.1215624064207077, 'timestamp': '2025-09-10 02:48:02.993835', 'step': 10746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:03.049491', 'step': 10746, 'epoch': 2} {'type': 'loss', 'content': 0.11300169676542282, 'timestamp': '2025-09-10 02:48:03.051676', 'step': 10747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:03.106367', 'step': 10747, 'epoch': 2} {'type': 'loss', 'content': 0.16219070553779602, 'timestamp': '2025-09-10 02:48:03.112716', 'step': 10748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:03.167389', 'step': 10748, 'epoch': 2} {'type': 'loss', 'content': 0.12214874476194382, 'timestamp': '2025-09-10 02:48:03.169567', 'step': 10749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:03.223679', 'step': 10749, 'epoch': 2} {'type': 'loss', 'content': 0.26868611574172974, 'timestamp': '2025-09-10 02:48:03.225853', 'step': 10750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:03.279820', 'step': 10750, 'epoch': 2} {'type': 'loss', 'content': 0.1389882117509842, 'timestamp': '2025-09-10 02:48:03.282080', 'step': 10751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:03.336985', 'step': 10751, 'epoch': 2} {'type': 'loss', 'content': 0.01731443777680397, 'timestamp': '2025-09-10 02:48:03.343237', 'step': 10752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:03.401782', 'step': 10752, 'epoch': 2} {'type': 'loss', 'content': 0.20040522515773773, 'timestamp': '2025-09-10 02:48:03.403823', 'step': 10753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:03.459065', 'step': 10753, 'epoch': 2} {'type': 'loss', 'content': 0.1710684895515442, 'timestamp': '2025-09-10 02:48:03.461292', 'step': 10754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:03.515694', 'step': 10754, 'epoch': 2} {'type': 'loss', 'content': 0.09184679388999939, 'timestamp': '2025-09-10 02:48:03.517842', 'step': 10755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:03.572520', 'step': 10755, 'epoch': 2} {'type': 'loss', 'content': 0.10839428007602692, 'timestamp': '2025-09-10 02:48:03.583465', 'step': 10756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:03.637296', 'step': 10756, 'epoch': 2} {'type': 'loss', 'content': 0.11328386515378952, 'timestamp': '2025-09-10 02:48:03.639461', 'step': 10757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:03.692406', 'step': 10757, 'epoch': 2} {'type': 'loss', 'content': 0.10438045859336853, 'timestamp': '2025-09-10 02:48:03.694191', 'step': 10758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:03.749881', 'step': 10758, 'epoch': 2} {'type': 'loss', 'content': 0.09688637405633926, 'timestamp': '2025-09-10 02:48:03.752246', 'step': 10759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:03.805574', 'step': 10759, 'epoch': 2} {'type': 'loss', 'content': 0.12881077826023102, 'timestamp': '2025-09-10 02:48:03.816554', 'step': 10760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:03.872128', 'step': 10760, 'epoch': 2} {'type': 'loss', 'content': 0.11183781176805496, 'timestamp': '2025-09-10 02:48:03.875687', 'step': 10761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:03.931513', 'step': 10761, 'epoch': 2} {'type': 'loss', 'content': 0.19538146257400513, 'timestamp': '2025-09-10 02:48:03.933488', 'step': 10762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:03.987158', 'step': 10762, 'epoch': 2} {'type': 'loss', 'content': 0.12747487425804138, 'timestamp': '2025-09-10 02:48:03.989325', 'step': 10763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:04.043309', 'step': 10763, 'epoch': 2} {'type': 'loss', 'content': 0.08363789319992065, 'timestamp': '2025-09-10 02:48:04.049297', 'step': 10764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:04.102421', 'step': 10764, 'epoch': 2} {'type': 'loss', 'content': 0.23493769764900208, 'timestamp': '2025-09-10 02:48:04.104459', 'step': 10765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:04.157978', 'step': 10765, 'epoch': 2} {'type': 'loss', 'content': 0.08412639796733856, 'timestamp': '2025-09-10 02:48:04.159982', 'step': 10766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:04.213216', 'step': 10766, 'epoch': 2} {'type': 'loss', 'content': 0.13770723342895508, 'timestamp': '2025-09-10 02:48:04.215452', 'step': 10767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:04.269735', 'step': 10767, 'epoch': 2} {'type': 'loss', 'content': 0.12638115882873535, 'timestamp': '2025-09-10 02:48:04.275852', 'step': 10768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:04.329148', 'step': 10768, 'epoch': 2} {'type': 'loss', 'content': 0.1277812421321869, 'timestamp': '2025-09-10 02:48:04.331127', 'step': 10769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:04.383910', 'step': 10769, 'epoch': 2} {'type': 'loss', 'content': 0.11234790831804276, 'timestamp': '2025-09-10 02:48:04.385933', 'step': 10770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:04.439173', 'step': 10770, 'epoch': 2} {'type': 'loss', 'content': 0.18112973868846893, 'timestamp': '2025-09-10 02:48:04.441305', 'step': 10771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:04.494098', 'step': 10771, 'epoch': 2} {'type': 'loss', 'content': 0.12709344923496246, 'timestamp': '2025-09-10 02:48:04.500247', 'step': 10772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:04.552511', 'step': 10772, 'epoch': 2} {'type': 'loss', 'content': 0.12437763810157776, 'timestamp': '2025-09-10 02:48:04.554483', 'step': 10773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:04.607716', 'step': 10773, 'epoch': 2} {'type': 'loss', 'content': 0.11909560859203339, 'timestamp': '2025-09-10 02:48:04.609778', 'step': 10774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:04.663510', 'step': 10774, 'epoch': 2} {'type': 'loss', 'content': 0.11449946463108063, 'timestamp': '2025-09-10 02:48:04.665524', 'step': 10775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:04.718910', 'step': 10775, 'epoch': 2} {'type': 'loss', 'content': 0.11071299016475677, 'timestamp': '2025-09-10 02:48:04.725131', 'step': 10776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:04.778545', 'step': 10776, 'epoch': 2} {'type': 'loss', 'content': 0.09174904972314835, 'timestamp': '2025-09-10 02:48:04.780603', 'step': 10777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:04.834846', 'step': 10777, 'epoch': 2} {'type': 'loss', 'content': 0.1673581302165985, 'timestamp': '2025-09-10 02:48:04.837095', 'step': 10778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:04.890430', 'step': 10778, 'epoch': 2} {'type': 'loss', 'content': 0.17006106674671173, 'timestamp': '2025-09-10 02:48:04.892505', 'step': 10779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:04.946509', 'step': 10779, 'epoch': 2} {'type': 'loss', 'content': 0.10566587746143341, 'timestamp': '2025-09-10 02:48:04.952433', 'step': 10780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:05.005098', 'step': 10780, 'epoch': 2} {'type': 'loss', 'content': 0.14873898029327393, 'timestamp': '2025-09-10 02:48:05.007474', 'step': 10781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:05.060431', 'step': 10781, 'epoch': 2} {'type': 'loss', 'content': 0.05298875644803047, 'timestamp': '2025-09-10 02:48:05.062725', 'step': 10782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:05.117125', 'step': 10782, 'epoch': 2} {'type': 'loss', 'content': 0.06694924086332321, 'timestamp': '2025-09-10 02:48:05.119405', 'step': 10783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:05.172709', 'step': 10783, 'epoch': 2} {'type': 'loss', 'content': 0.10260628163814545, 'timestamp': '2025-09-10 02:48:05.178888', 'step': 10784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:05.232613', 'step': 10784, 'epoch': 2} {'type': 'loss', 'content': 0.23804181814193726, 'timestamp': '2025-09-10 02:48:05.235043', 'step': 10785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:05.288450', 'step': 10785, 'epoch': 2} {'type': 'loss', 'content': 0.1052517369389534, 'timestamp': '2025-09-10 02:48:05.290934', 'step': 10786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:05.346692', 'step': 10786, 'epoch': 2} {'type': 'loss', 'content': 0.14420086145401, 'timestamp': '2025-09-10 02:48:05.348908', 'step': 10787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:05.402705', 'step': 10787, 'epoch': 2} {'type': 'loss', 'content': 0.1680610179901123, 'timestamp': '2025-09-10 02:48:05.408778', 'step': 10788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:05.461847', 'step': 10788, 'epoch': 2} {'type': 'loss', 'content': 0.20174838602542877, 'timestamp': '2025-09-10 02:48:05.464072', 'step': 10789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:05.521496', 'step': 10789, 'epoch': 2} {'type': 'loss', 'content': 0.11615345627069473, 'timestamp': '2025-09-10 02:48:05.523741', 'step': 10790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:05.577216', 'step': 10790, 'epoch': 2} {'type': 'loss', 'content': 0.07240164279937744, 'timestamp': '2025-09-10 02:48:05.579463', 'step': 10791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:05.633893', 'step': 10791, 'epoch': 2} {'type': 'loss', 'content': 0.2187633514404297, 'timestamp': '2025-09-10 02:48:05.640646', 'step': 10792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:05.694869', 'step': 10792, 'epoch': 2} {'type': 'loss', 'content': 0.07249309122562408, 'timestamp': '2025-09-10 02:48:05.697033', 'step': 10793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:05.751351', 'step': 10793, 'epoch': 2} {'type': 'loss', 'content': 0.18755808472633362, 'timestamp': '2025-09-10 02:48:05.753590', 'step': 10794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:05.807703', 'step': 10794, 'epoch': 2} {'type': 'loss', 'content': 0.08522365987300873, 'timestamp': '2025-09-10 02:48:05.810150', 'step': 10795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:05.863173', 'step': 10795, 'epoch': 2} {'type': 'loss', 'content': 0.1425163298845291, 'timestamp': '2025-09-10 02:48:05.869043', 'step': 10796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:05.922062', 'step': 10796, 'epoch': 2} {'type': 'loss', 'content': 0.09196678549051285, 'timestamp': '2025-09-10 02:48:05.924227', 'step': 10797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:05.977995', 'step': 10797, 'epoch': 2} {'type': 'loss', 'content': 0.09657099843025208, 'timestamp': '2025-09-10 02:48:05.980196', 'step': 10798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:06.035896', 'step': 10798, 'epoch': 2} {'type': 'loss', 'content': 0.2096768617630005, 'timestamp': '2025-09-10 02:48:06.038274', 'step': 10799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:48:06.109683', 'step': 10799, 'epoch': 2} {'type': 'loss', 'content': 0.10841713100671768, 'timestamp': '2025-09-10 02:48:06.122962', 'step': 10800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:06.176846', 'step': 10800, 'epoch': 2} {'type': 'loss', 'content': 0.23108558356761932, 'timestamp': '2025-09-10 02:48:06.179106', 'step': 10801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:06.232984', 'step': 10801, 'epoch': 2} {'type': 'loss', 'content': 0.2348235845565796, 'timestamp': '2025-09-10 02:48:06.235192', 'step': 10802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:06.289549', 'step': 10802, 'epoch': 2} {'type': 'loss', 'content': 0.1530996561050415, 'timestamp': '2025-09-10 02:48:06.291720', 'step': 10803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:06.346024', 'step': 10803, 'epoch': 2} {'type': 'loss', 'content': 0.17021393775939941, 'timestamp': '2025-09-10 02:48:06.352226', 'step': 10804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:06.406336', 'step': 10804, 'epoch': 2} {'type': 'loss', 'content': 0.12110047042369843, 'timestamp': '2025-09-10 02:48:06.408640', 'step': 10805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:06.464730', 'step': 10805, 'epoch': 2} {'type': 'loss', 'content': 0.1488836109638214, 'timestamp': '2025-09-10 02:48:06.467454', 'step': 10806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:06.521423', 'step': 10806, 'epoch': 2} {'type': 'loss', 'content': 0.13044053316116333, 'timestamp': '2025-09-10 02:48:06.523531', 'step': 10807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:06.576625', 'step': 10807, 'epoch': 2} {'type': 'loss', 'content': 0.08871028572320938, 'timestamp': '2025-09-10 02:48:06.582712', 'step': 10808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:06.636615', 'step': 10808, 'epoch': 2} {'type': 'loss', 'content': 0.2176654040813446, 'timestamp': '2025-09-10 02:48:06.638755', 'step': 10809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:06.692791', 'step': 10809, 'epoch': 2} {'type': 'loss', 'content': 0.1174514964222908, 'timestamp': '2025-09-10 02:48:06.695116', 'step': 10810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:06.749283', 'step': 10810, 'epoch': 2} {'type': 'loss', 'content': 0.11170351505279541, 'timestamp': '2025-09-10 02:48:06.751023', 'step': 10811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:06.804249', 'step': 10811, 'epoch': 2} {'type': 'loss', 'content': 0.11633240431547165, 'timestamp': '2025-09-10 02:48:06.810020', 'step': 10812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:06.862330', 'step': 10812, 'epoch': 2} {'type': 'loss', 'content': 0.11150696873664856, 'timestamp': '2025-09-10 02:48:06.864357', 'step': 10813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:06.916937', 'step': 10813, 'epoch': 2} {'type': 'loss', 'content': 0.09616348147392273, 'timestamp': '2025-09-10 02:48:06.919245', 'step': 10814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:06.972840', 'step': 10814, 'epoch': 2} {'type': 'loss', 'content': 0.22519703209400177, 'timestamp': '2025-09-10 02:48:06.975099', 'step': 10815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:07.028503', 'step': 10815, 'epoch': 2} {'type': 'loss', 'content': 0.13057468831539154, 'timestamp': '2025-09-10 02:48:07.036044', 'step': 10816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:07.090353', 'step': 10816, 'epoch': 2} {'type': 'loss', 'content': 0.10570798069238663, 'timestamp': '2025-09-10 02:48:07.092523', 'step': 10817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:07.146870', 'step': 10817, 'epoch': 2} {'type': 'loss', 'content': 0.09552253037691116, 'timestamp': '2025-09-10 02:48:07.148786', 'step': 10818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:07.204313', 'step': 10818, 'epoch': 2} {'type': 'loss', 'content': 0.0695212259888649, 'timestamp': '2025-09-10 02:48:07.206360', 'step': 10819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:07.264758', 'step': 10819, 'epoch': 2} {'type': 'loss', 'content': 0.160335972905159, 'timestamp': '2025-09-10 02:48:07.270735', 'step': 10820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:07.325898', 'step': 10820, 'epoch': 2} {'type': 'loss', 'content': 0.15242810547351837, 'timestamp': '2025-09-10 02:48:07.328057', 'step': 10821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:07.383245', 'step': 10821, 'epoch': 2} {'type': 'loss', 'content': 0.13265639543533325, 'timestamp': '2025-09-10 02:48:07.385480', 'step': 10822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:07.439746', 'step': 10822, 'epoch': 2} {'type': 'loss', 'content': 0.07037529349327087, 'timestamp': '2025-09-10 02:48:07.441702', 'step': 10823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:07.497164', 'step': 10823, 'epoch': 2} {'type': 'loss', 'content': 0.08870378136634827, 'timestamp': '2025-09-10 02:48:07.503269', 'step': 10824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:07.557227', 'step': 10824, 'epoch': 2} {'type': 'loss', 'content': 0.21613189578056335, 'timestamp': '2025-09-10 02:48:07.559248', 'step': 10825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:07.612765', 'step': 10825, 'epoch': 2} {'type': 'loss', 'content': 0.08212439715862274, 'timestamp': '2025-09-10 02:48:07.614767', 'step': 10826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:07.668718', 'step': 10826, 'epoch': 2} {'type': 'loss', 'content': 0.1414210945367813, 'timestamp': '2025-09-10 02:48:07.670645', 'step': 10827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:07.725023', 'step': 10827, 'epoch': 2} {'type': 'loss', 'content': 0.12833984196186066, 'timestamp': '2025-09-10 02:48:07.731241', 'step': 10828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:07.784225', 'step': 10828, 'epoch': 2} {'type': 'loss', 'content': 0.1583511382341385, 'timestamp': '2025-09-10 02:48:07.786230', 'step': 10829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:07.841902', 'step': 10829, 'epoch': 2} {'type': 'loss', 'content': 0.10200148075819016, 'timestamp': '2025-09-10 02:48:07.843971', 'step': 10830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:07.901850', 'step': 10830, 'epoch': 2} {'type': 'loss', 'content': 0.19785574078559875, 'timestamp': '2025-09-10 02:48:07.903854', 'step': 10831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:07.961303', 'step': 10831, 'epoch': 2} {'type': 'loss', 'content': 0.11194062978029251, 'timestamp': '2025-09-10 02:48:07.967440', 'step': 10832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:08.025679', 'step': 10832, 'epoch': 2} {'type': 'loss', 'content': 0.10255152732133865, 'timestamp': '2025-09-10 02:48:08.027709', 'step': 10833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:08.085237', 'step': 10833, 'epoch': 2} {'type': 'loss', 'content': 0.1419982612133026, 'timestamp': '2025-09-10 02:48:08.087208', 'step': 10834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:08.143561', 'step': 10834, 'epoch': 2} {'type': 'loss', 'content': 0.10753233730792999, 'timestamp': '2025-09-10 02:48:08.150251', 'step': 10835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:08.209707', 'step': 10835, 'epoch': 2} {'type': 'loss', 'content': 0.19873781502246857, 'timestamp': '2025-09-10 02:48:08.215840', 'step': 10836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:08.280561', 'step': 10836, 'epoch': 2} {'type': 'loss', 'content': 0.11209288984537125, 'timestamp': '2025-09-10 02:48:08.282818', 'step': 10837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:08.337011', 'step': 10837, 'epoch': 2} {'type': 'loss', 'content': 0.1322859227657318, 'timestamp': '2025-09-10 02:48:08.338966', 'step': 10838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:08.395174', 'step': 10838, 'epoch': 2} {'type': 'loss', 'content': 0.10127270221710205, 'timestamp': '2025-09-10 02:48:08.397215', 'step': 10839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:08.456928', 'step': 10839, 'epoch': 2} {'type': 'loss', 'content': 0.1577857881784439, 'timestamp': '2025-09-10 02:48:08.463013', 'step': 10840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:08.516320', 'step': 10840, 'epoch': 2} {'type': 'loss', 'content': 0.1268243044614792, 'timestamp': '2025-09-10 02:48:08.518354', 'step': 10841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:08.575282', 'step': 10841, 'epoch': 2} {'type': 'loss', 'content': 0.1762005090713501, 'timestamp': '2025-09-10 02:48:08.577419', 'step': 10842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:08.633165', 'step': 10842, 'epoch': 2} {'type': 'loss', 'content': 0.18100586533546448, 'timestamp': '2025-09-10 02:48:08.635465', 'step': 10843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:08.692286', 'step': 10843, 'epoch': 2} {'type': 'loss', 'content': 0.07626884430646896, 'timestamp': '2025-09-10 02:48:08.698316', 'step': 10844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:08.752995', 'step': 10844, 'epoch': 2} {'type': 'loss', 'content': 0.1598738133907318, 'timestamp': '2025-09-10 02:48:08.754998', 'step': 10845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:08.810878', 'step': 10845, 'epoch': 2} {'type': 'loss', 'content': 0.14205533266067505, 'timestamp': '2025-09-10 02:48:08.812972', 'step': 10846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:08.866701', 'step': 10846, 'epoch': 2} {'type': 'loss', 'content': 0.10374990105628967, 'timestamp': '2025-09-10 02:48:08.869762', 'step': 10847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:08.932659', 'step': 10847, 'epoch': 2} {'type': 'loss', 'content': 0.08013036102056503, 'timestamp': '2025-09-10 02:48:08.938840', 'step': 10848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:08.993757', 'step': 10848, 'epoch': 2} {'type': 'loss', 'content': 0.1027774065732956, 'timestamp': '2025-09-10 02:48:08.995683', 'step': 10849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:09.050752', 'step': 10849, 'epoch': 2} {'type': 'loss', 'content': 0.18999898433685303, 'timestamp': '2025-09-10 02:48:09.052707', 'step': 10850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:09.108213', 'step': 10850, 'epoch': 2} {'type': 'loss', 'content': 0.14701972901821136, 'timestamp': '2025-09-10 02:48:09.110290', 'step': 10851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:09.164131', 'step': 10851, 'epoch': 2} {'type': 'loss', 'content': 0.1608530580997467, 'timestamp': '2025-09-10 02:48:09.170216', 'step': 10852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:09.226730', 'step': 10852, 'epoch': 2} {'type': 'loss', 'content': 0.10335186868906021, 'timestamp': '2025-09-10 02:48:09.228698', 'step': 10853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:09.283275', 'step': 10853, 'epoch': 2} {'type': 'loss', 'content': 0.09160319715738297, 'timestamp': '2025-09-10 02:48:09.285339', 'step': 10854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:09.339894', 'step': 10854, 'epoch': 2} {'type': 'loss', 'content': 0.06851858645677567, 'timestamp': '2025-09-10 02:48:09.341829', 'step': 10855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:09.405971', 'step': 10855, 'epoch': 2} {'type': 'loss', 'content': 0.11996302753686905, 'timestamp': '2025-09-10 02:48:09.412156', 'step': 10856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:09.468915', 'step': 10856, 'epoch': 2} {'type': 'loss', 'content': 0.15540967881679535, 'timestamp': '2025-09-10 02:48:09.471084', 'step': 10857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:09.525689', 'step': 10857, 'epoch': 2} {'type': 'loss', 'content': 0.15811829268932343, 'timestamp': '2025-09-10 02:48:09.527801', 'step': 10858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:09.583400', 'step': 10858, 'epoch': 2} {'type': 'loss', 'content': 0.19205085933208466, 'timestamp': '2025-09-10 02:48:09.585652', 'step': 10859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:09.643046', 'step': 10859, 'epoch': 2} {'type': 'loss', 'content': 0.07303227484226227, 'timestamp': '2025-09-10 02:48:09.650621', 'step': 10860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:09.712014', 'step': 10860, 'epoch': 2} {'type': 'loss', 'content': 0.11503511667251587, 'timestamp': '2025-09-10 02:48:09.713985', 'step': 10861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:09.769009', 'step': 10861, 'epoch': 2} {'type': 'loss', 'content': 0.11267448216676712, 'timestamp': '2025-09-10 02:48:09.770966', 'step': 10862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:09.827888', 'step': 10862, 'epoch': 2} {'type': 'loss', 'content': 0.258510947227478, 'timestamp': '2025-09-10 02:48:09.830322', 'step': 10863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:09.887659', 'step': 10863, 'epoch': 2} {'type': 'loss', 'content': 0.1565408706665039, 'timestamp': '2025-09-10 02:48:09.893896', 'step': 10864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:09.946944', 'step': 10864, 'epoch': 2} {'type': 'loss', 'content': 0.10136348754167557, 'timestamp': '2025-09-10 02:48:09.949161', 'step': 10865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:10.005126', 'step': 10865, 'epoch': 2} {'type': 'loss', 'content': 0.09356154501438141, 'timestamp': '2025-09-10 02:48:10.014901', 'step': 10866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:10.081616', 'step': 10866, 'epoch': 2} {'type': 'loss', 'content': 0.14285293221473694, 'timestamp': '2025-09-10 02:48:10.088319', 'step': 10867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:10.147697', 'step': 10867, 'epoch': 2} {'type': 'loss', 'content': 0.15953396260738373, 'timestamp': '2025-09-10 02:48:10.153707', 'step': 10868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:10.207669', 'step': 10868, 'epoch': 2} {'type': 'loss', 'content': 0.09110426902770996, 'timestamp': '2025-09-10 02:48:10.209621', 'step': 10869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:10.263749', 'step': 10869, 'epoch': 2} {'type': 'loss', 'content': 0.09243103116750717, 'timestamp': '2025-09-10 02:48:10.265763', 'step': 10870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:10.319814', 'step': 10870, 'epoch': 2} {'type': 'loss', 'content': 0.04806648939847946, 'timestamp': '2025-09-10 02:48:10.321954', 'step': 10871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:10.375929', 'step': 10871, 'epoch': 2} {'type': 'loss', 'content': 0.19156281650066376, 'timestamp': '2025-09-10 02:48:10.382145', 'step': 10872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:10.435966', 'step': 10872, 'epoch': 2} {'type': 'loss', 'content': 0.14593155682086945, 'timestamp': '2025-09-10 02:48:10.438050', 'step': 10873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:10.491735', 'step': 10873, 'epoch': 2} {'type': 'loss', 'content': 0.09335773438215256, 'timestamp': '2025-09-10 02:48:10.493759', 'step': 10874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:10.549064', 'step': 10874, 'epoch': 2} {'type': 'loss', 'content': 0.11636039614677429, 'timestamp': '2025-09-10 02:48:10.551061', 'step': 10875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:10.605162', 'step': 10875, 'epoch': 2} {'type': 'loss', 'content': 0.13775762915611267, 'timestamp': '2025-09-10 02:48:10.611663', 'step': 10876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:10.666970', 'step': 10876, 'epoch': 2} {'type': 'loss', 'content': 0.07504823058843613, 'timestamp': '2025-09-10 02:48:10.668932', 'step': 10877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:10.722618', 'step': 10877, 'epoch': 2} {'type': 'loss', 'content': 0.09551258385181427, 'timestamp': '2025-09-10 02:48:10.724630', 'step': 10878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:10.780398', 'step': 10878, 'epoch': 2} {'type': 'loss', 'content': 0.12310247123241425, 'timestamp': '2025-09-10 02:48:10.782365', 'step': 10879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:10.838570', 'step': 10879, 'epoch': 2} {'type': 'loss', 'content': 0.06553320586681366, 'timestamp': '2025-09-10 02:48:10.844621', 'step': 10880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:10.898665', 'step': 10880, 'epoch': 2} {'type': 'loss', 'content': 0.1580280214548111, 'timestamp': '2025-09-10 02:48:10.900625', 'step': 10881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:10.955153', 'step': 10881, 'epoch': 2} {'type': 'loss', 'content': 0.08534148335456848, 'timestamp': '2025-09-10 02:48:10.957292', 'step': 10882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:11.011384', 'step': 10882, 'epoch': 2} {'type': 'loss', 'content': 0.13756844401359558, 'timestamp': '2025-09-10 02:48:11.013521', 'step': 10883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:11.068792', 'step': 10883, 'epoch': 2} {'type': 'loss', 'content': 0.08732322603464127, 'timestamp': '2025-09-10 02:48:11.074758', 'step': 10884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:11.129088', 'step': 10884, 'epoch': 2} {'type': 'loss', 'content': 0.06391478329896927, 'timestamp': '2025-09-10 02:48:11.131020', 'step': 10885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:11.186900', 'step': 10885, 'epoch': 2} {'type': 'loss', 'content': 0.09516045451164246, 'timestamp': '2025-09-10 02:48:11.188873', 'step': 10886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:11.244179', 'step': 10886, 'epoch': 2} {'type': 'loss', 'content': 0.2170359194278717, 'timestamp': '2025-09-10 02:48:11.245935', 'step': 10887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:11.300632', 'step': 10887, 'epoch': 2} {'type': 'loss', 'content': 0.2359507977962494, 'timestamp': '2025-09-10 02:48:11.306713', 'step': 10888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:11.364193', 'step': 10888, 'epoch': 2} {'type': 'loss', 'content': 0.12438911944627762, 'timestamp': '2025-09-10 02:48:11.366340', 'step': 10889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:11.419933', 'step': 10889, 'epoch': 2} {'type': 'loss', 'content': 0.19017061591148376, 'timestamp': '2025-09-10 02:48:11.421983', 'step': 10890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:11.479975', 'step': 10890, 'epoch': 2} {'type': 'loss', 'content': 0.12591201066970825, 'timestamp': '2025-09-10 02:48:11.482075', 'step': 10891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:11.537629', 'step': 10891, 'epoch': 2} {'type': 'loss', 'content': 0.10170786827802658, 'timestamp': '2025-09-10 02:48:11.543819', 'step': 10892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:11.601443', 'step': 10892, 'epoch': 2} {'type': 'loss', 'content': 0.053053632378578186, 'timestamp': '2025-09-10 02:48:11.603256', 'step': 10893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:11.656863', 'step': 10893, 'epoch': 2} {'type': 'loss', 'content': 0.13694345951080322, 'timestamp': '2025-09-10 02:48:11.658797', 'step': 10894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:11.711996', 'step': 10894, 'epoch': 2} {'type': 'loss', 'content': 0.10344608128070831, 'timestamp': '2025-09-10 02:48:11.713736', 'step': 10895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:11.766434', 'step': 10895, 'epoch': 2} {'type': 'loss', 'content': 0.2741551697254181, 'timestamp': '2025-09-10 02:48:11.772463', 'step': 10896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:11.825290', 'step': 10896, 'epoch': 2} {'type': 'loss', 'content': 0.08108164370059967, 'timestamp': '2025-09-10 02:48:11.827281', 'step': 10897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:11.882059', 'step': 10897, 'epoch': 2} {'type': 'loss', 'content': 0.19006557762622833, 'timestamp': '2025-09-10 02:48:11.884057', 'step': 10898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:11.937754', 'step': 10898, 'epoch': 2} {'type': 'loss', 'content': 0.06547939777374268, 'timestamp': '2025-09-10 02:48:11.939873', 'step': 10899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:11.993918', 'step': 10899, 'epoch': 2} {'type': 'loss', 'content': 0.09892997145652771, 'timestamp': '2025-09-10 02:48:12.000228', 'step': 10900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:12.053244', 'step': 10900, 'epoch': 2} {'type': 'loss', 'content': 0.11664703488349915, 'timestamp': '2025-09-10 02:48:12.055042', 'step': 10901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:12.108130', 'step': 10901, 'epoch': 2} {'type': 'loss', 'content': 0.16081108152866364, 'timestamp': '2025-09-10 02:48:12.109901', 'step': 10902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:12.164036', 'step': 10902, 'epoch': 2} {'type': 'loss', 'content': 0.14869660139083862, 'timestamp': '2025-09-10 02:48:12.165788', 'step': 10903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:12.219107', 'step': 10903, 'epoch': 2} {'type': 'loss', 'content': 0.09391423314809799, 'timestamp': '2025-09-10 02:48:12.224909', 'step': 10904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:12.277977', 'step': 10904, 'epoch': 2} {'type': 'loss', 'content': 0.13298752903938293, 'timestamp': '2025-09-10 02:48:12.279991', 'step': 10905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:12.333209', 'step': 10905, 'epoch': 2} {'type': 'loss', 'content': 0.09428905695676804, 'timestamp': '2025-09-10 02:48:12.335246', 'step': 10906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:12.388611', 'step': 10906, 'epoch': 2} {'type': 'loss', 'content': 0.09723635762929916, 'timestamp': '2025-09-10 02:48:12.390590', 'step': 10907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:12.444416', 'step': 10907, 'epoch': 2} {'type': 'loss', 'content': 0.17587290704250336, 'timestamp': '2025-09-10 02:48:12.450382', 'step': 10908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:12.510500', 'step': 10908, 'epoch': 2} {'type': 'loss', 'content': 0.14074598252773285, 'timestamp': '2025-09-10 02:48:12.512685', 'step': 10909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:12.567400', 'step': 10909, 'epoch': 2} {'type': 'loss', 'content': 0.1322881281375885, 'timestamp': '2025-09-10 02:48:12.569194', 'step': 10910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:12.625485', 'step': 10910, 'epoch': 2} {'type': 'loss', 'content': 0.20705069601535797, 'timestamp': '2025-09-10 02:48:12.627230', 'step': 10911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:12.686682', 'step': 10911, 'epoch': 2} {'type': 'loss', 'content': 0.1653110533952713, 'timestamp': '2025-09-10 02:48:12.693095', 'step': 10912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:12.750255', 'step': 10912, 'epoch': 2} {'type': 'loss', 'content': 0.09572098404169083, 'timestamp': '2025-09-10 02:48:12.752286', 'step': 10913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:12.808203', 'step': 10913, 'epoch': 2} {'type': 'loss', 'content': 0.13968534767627716, 'timestamp': '2025-09-10 02:48:12.810316', 'step': 10914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:12.870289', 'step': 10914, 'epoch': 2} {'type': 'loss', 'content': 0.10248100012540817, 'timestamp': '2025-09-10 02:48:12.872500', 'step': 10915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:12.927626', 'step': 10915, 'epoch': 2} {'type': 'loss', 'content': 0.15315474569797516, 'timestamp': '2025-09-10 02:48:12.933667', 'step': 10916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:12.992534', 'step': 10916, 'epoch': 2} {'type': 'loss', 'content': 0.04553857818245888, 'timestamp': '2025-09-10 02:48:12.994301', 'step': 10917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:13.051472', 'step': 10917, 'epoch': 2} {'type': 'loss', 'content': 0.0904921367764473, 'timestamp': '2025-09-10 02:48:13.053403', 'step': 10918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:13.107978', 'step': 10918, 'epoch': 2} {'type': 'loss', 'content': 0.13388283550739288, 'timestamp': '2025-09-10 02:48:13.109760', 'step': 10919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:13.166408', 'step': 10919, 'epoch': 2} {'type': 'loss', 'content': 0.09021127969026566, 'timestamp': '2025-09-10 02:48:13.172591', 'step': 10920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:13.229833', 'step': 10920, 'epoch': 2} {'type': 'loss', 'content': 0.10489711910486221, 'timestamp': '2025-09-10 02:48:13.231845', 'step': 10921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:13.285329', 'step': 10921, 'epoch': 2} {'type': 'loss', 'content': 0.0780898854136467, 'timestamp': '2025-09-10 02:48:13.287353', 'step': 10922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:13.340662', 'step': 10922, 'epoch': 2} {'type': 'loss', 'content': 0.12714126706123352, 'timestamp': '2025-09-10 02:48:13.342634', 'step': 10923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:13.395470', 'step': 10923, 'epoch': 2} {'type': 'loss', 'content': 0.10648178309202194, 'timestamp': '2025-09-10 02:48:13.401517', 'step': 10924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:13.453941', 'step': 10924, 'epoch': 2} {'type': 'loss', 'content': 0.13986095786094666, 'timestamp': '2025-09-10 02:48:13.455737', 'step': 10925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:13.508780', 'step': 10925, 'epoch': 2} {'type': 'loss', 'content': 0.0936514288187027, 'timestamp': '2025-09-10 02:48:13.510589', 'step': 10926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:13.564293', 'step': 10926, 'epoch': 2} {'type': 'loss', 'content': 0.14963367581367493, 'timestamp': '2025-09-10 02:48:13.566417', 'step': 10927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:13.619735', 'step': 10927, 'epoch': 2} {'type': 'loss', 'content': 0.07952989637851715, 'timestamp': '2025-09-10 02:48:13.625715', 'step': 10928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:13.678174', 'step': 10928, 'epoch': 2} {'type': 'loss', 'content': 0.1514047533273697, 'timestamp': '2025-09-10 02:48:13.680299', 'step': 10929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:13.734100', 'step': 10929, 'epoch': 2} {'type': 'loss', 'content': 0.13476260006427765, 'timestamp': '2025-09-10 02:48:13.736026', 'step': 10930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:13.789355', 'step': 10930, 'epoch': 2} {'type': 'loss', 'content': 0.19533175230026245, 'timestamp': '2025-09-10 02:48:13.791354', 'step': 10931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:13.844079', 'step': 10931, 'epoch': 2} {'type': 'loss', 'content': 0.07981114834547043, 'timestamp': '2025-09-10 02:48:13.850127', 'step': 10932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:13.903031', 'step': 10932, 'epoch': 2} {'type': 'loss', 'content': 0.10860636085271835, 'timestamp': '2025-09-10 02:48:13.905141', 'step': 10933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:13.958348', 'step': 10933, 'epoch': 2} {'type': 'loss', 'content': 0.172163724899292, 'timestamp': '2025-09-10 02:48:13.960481', 'step': 10934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:14.013470', 'step': 10934, 'epoch': 2} {'type': 'loss', 'content': 0.15042172372341156, 'timestamp': '2025-09-10 02:48:14.015198', 'step': 10935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:14.067572', 'step': 10935, 'epoch': 2} {'type': 'loss', 'content': 0.23606614768505096, 'timestamp': '2025-09-10 02:48:14.073457', 'step': 10936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:14.126099', 'step': 10936, 'epoch': 2} {'type': 'loss', 'content': 0.09194707125425339, 'timestamp': '2025-09-10 02:48:14.127804', 'step': 10937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:14.180509', 'step': 10937, 'epoch': 2} {'type': 'loss', 'content': 0.12456897646188736, 'timestamp': '2025-09-10 02:48:14.182610', 'step': 10938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:14.235403', 'step': 10938, 'epoch': 2} {'type': 'loss', 'content': 0.12302982062101364, 'timestamp': '2025-09-10 02:48:14.237328', 'step': 10939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:14.291185', 'step': 10939, 'epoch': 2} {'type': 'loss', 'content': 0.15863439440727234, 'timestamp': '2025-09-10 02:48:14.297152', 'step': 10940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:14.349621', 'step': 10940, 'epoch': 2} {'type': 'loss', 'content': 0.11441870033740997, 'timestamp': '2025-09-10 02:48:14.351591', 'step': 10941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:14.405612', 'step': 10941, 'epoch': 2} {'type': 'loss', 'content': 0.20113065838813782, 'timestamp': '2025-09-10 02:48:14.407513', 'step': 10942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:14.460602', 'step': 10942, 'epoch': 2} {'type': 'loss', 'content': 0.17484407126903534, 'timestamp': '2025-09-10 02:48:14.462751', 'step': 10943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:14.516732', 'step': 10943, 'epoch': 2} {'type': 'loss', 'content': 0.1454068422317505, 'timestamp': '2025-09-10 02:48:14.522471', 'step': 10944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:14.575352', 'step': 10944, 'epoch': 2} {'type': 'loss', 'content': 0.12157800048589706, 'timestamp': '2025-09-10 02:48:14.577293', 'step': 10945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:14.630215', 'step': 10945, 'epoch': 2} {'type': 'loss', 'content': 0.09131594747304916, 'timestamp': '2025-09-10 02:48:14.631970', 'step': 10946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:14.684852', 'step': 10946, 'epoch': 2} {'type': 'loss', 'content': 0.13870474696159363, 'timestamp': '2025-09-10 02:48:14.686810', 'step': 10947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:14.741784', 'step': 10947, 'epoch': 2} {'type': 'loss', 'content': 0.09803605824708939, 'timestamp': '2025-09-10 02:48:14.747555', 'step': 10948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:14.799336', 'step': 10948, 'epoch': 2} {'type': 'loss', 'content': 0.1411946564912796, 'timestamp': '2025-09-10 02:48:14.801304', 'step': 10949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:14.855533', 'step': 10949, 'epoch': 2} {'type': 'loss', 'content': 0.09431717544794083, 'timestamp': '2025-09-10 02:48:14.857482', 'step': 10950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:14.911320', 'step': 10950, 'epoch': 2} {'type': 'loss', 'content': 0.10736474394798279, 'timestamp': '2025-09-10 02:48:14.913235', 'step': 10951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:14.966736', 'step': 10951, 'epoch': 2} {'type': 'loss', 'content': 0.11215722560882568, 'timestamp': '2025-09-10 02:48:14.972377', 'step': 10952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:15.025477', 'step': 10952, 'epoch': 2} {'type': 'loss', 'content': 0.05728529766201973, 'timestamp': '2025-09-10 02:48:15.027211', 'step': 10953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:15.080525', 'step': 10953, 'epoch': 2} {'type': 'loss', 'content': 0.15202167630195618, 'timestamp': '2025-09-10 02:48:15.082627', 'step': 10954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:15.135461', 'step': 10954, 'epoch': 2} {'type': 'loss', 'content': 0.13023056089878082, 'timestamp': '2025-09-10 02:48:15.137389', 'step': 10955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:15.189731', 'step': 10955, 'epoch': 2} {'type': 'loss', 'content': 0.3130339980125427, 'timestamp': '2025-09-10 02:48:15.195518', 'step': 10956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:15.248253', 'step': 10956, 'epoch': 2} {'type': 'loss', 'content': 0.07702454179525375, 'timestamp': '2025-09-10 02:48:15.250322', 'step': 10957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:15.303478', 'step': 10957, 'epoch': 2} {'type': 'loss', 'content': 0.16158317029476166, 'timestamp': '2025-09-10 02:48:15.305621', 'step': 10958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:15.359202', 'step': 10958, 'epoch': 2} {'type': 'loss', 'content': 0.13877512514591217, 'timestamp': '2025-09-10 02:48:15.360968', 'step': 10959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:15.414251', 'step': 10959, 'epoch': 2} {'type': 'loss', 'content': 0.10956083238124847, 'timestamp': '2025-09-10 02:48:15.419805', 'step': 10960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:15.472300', 'step': 10960, 'epoch': 2} {'type': 'loss', 'content': 0.1624528467655182, 'timestamp': '2025-09-10 02:48:15.473985', 'step': 10961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:15.526674', 'step': 10961, 'epoch': 2} {'type': 'loss', 'content': 0.1061311885714531, 'timestamp': '2025-09-10 02:48:15.528446', 'step': 10962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:15.580739', 'step': 10962, 'epoch': 2} {'type': 'loss', 'content': 0.07653339207172394, 'timestamp': '2025-09-10 02:48:15.582920', 'step': 10963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:15.635511', 'step': 10963, 'epoch': 2} {'type': 'loss', 'content': 0.1465078741312027, 'timestamp': '2025-09-10 02:48:15.641344', 'step': 10964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:15.693616', 'step': 10964, 'epoch': 2} {'type': 'loss', 'content': 0.10378549993038177, 'timestamp': '2025-09-10 02:48:15.695625', 'step': 10965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:15.748725', 'step': 10965, 'epoch': 2} {'type': 'loss', 'content': 0.11930112540721893, 'timestamp': '2025-09-10 02:48:15.750776', 'step': 10966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:15.806247', 'step': 10966, 'epoch': 2} {'type': 'loss', 'content': 0.10378009080886841, 'timestamp': '2025-09-10 02:48:15.808111', 'step': 10967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:15.861453', 'step': 10967, 'epoch': 2} {'type': 'loss', 'content': 0.10849880427122116, 'timestamp': '2025-09-10 02:48:15.867133', 'step': 10968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:15.919701', 'step': 10968, 'epoch': 2} {'type': 'loss', 'content': 0.11035297065973282, 'timestamp': '2025-09-10 02:48:15.921623', 'step': 10969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:15.975238', 'step': 10969, 'epoch': 2} {'type': 'loss', 'content': 0.19534046947956085, 'timestamp': '2025-09-10 02:48:15.977501', 'step': 10970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:16.032107', 'step': 10970, 'epoch': 2} {'type': 'loss', 'content': 0.16386419534683228, 'timestamp': '2025-09-10 02:48:16.034520', 'step': 10971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:16.089746', 'step': 10971, 'epoch': 2} {'type': 'loss', 'content': 0.14027845859527588, 'timestamp': '2025-09-10 02:48:16.096342', 'step': 10972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:16.149522', 'step': 10972, 'epoch': 2} {'type': 'loss', 'content': 0.07350046932697296, 'timestamp': '2025-09-10 02:48:16.151699', 'step': 10973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:16.206192', 'step': 10973, 'epoch': 2} {'type': 'loss', 'content': 0.11690066754817963, 'timestamp': '2025-09-10 02:48:16.209737', 'step': 10974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:16.264329', 'step': 10974, 'epoch': 2} {'type': 'loss', 'content': 0.2274176925420761, 'timestamp': '2025-09-10 02:48:16.266299', 'step': 10975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:16.319181', 'step': 10975, 'epoch': 2} {'type': 'loss', 'content': 0.1115437000989914, 'timestamp': '2025-09-10 02:48:16.325190', 'step': 10976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:16.378830', 'step': 10976, 'epoch': 2} {'type': 'loss', 'content': 0.12429061532020569, 'timestamp': '2025-09-10 02:48:16.380809', 'step': 10977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:16.436274', 'step': 10977, 'epoch': 2} {'type': 'loss', 'content': 0.12566164135932922, 'timestamp': '2025-09-10 02:48:16.438315', 'step': 10978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:16.493917', 'step': 10978, 'epoch': 2} {'type': 'loss', 'content': 0.12748722732067108, 'timestamp': '2025-09-10 02:48:16.495859', 'step': 10979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:16.549875', 'step': 10979, 'epoch': 2} {'type': 'loss', 'content': 0.1968621164560318, 'timestamp': '2025-09-10 02:48:16.557300', 'step': 10980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:16.611517', 'step': 10980, 'epoch': 2} {'type': 'loss', 'content': 0.10061978548765182, 'timestamp': '2025-09-10 02:48:16.613651', 'step': 10981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:16.669572', 'step': 10981, 'epoch': 2} {'type': 'loss', 'content': 0.1160176545381546, 'timestamp': '2025-09-10 02:48:16.671504', 'step': 10982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:16.726222', 'step': 10982, 'epoch': 2} {'type': 'loss', 'content': 0.17571242153644562, 'timestamp': '2025-09-10 02:48:16.728190', 'step': 10983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:16.781862', 'step': 10983, 'epoch': 2} {'type': 'loss', 'content': 0.08444692939519882, 'timestamp': '2025-09-10 02:48:16.788000', 'step': 10984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:16.842452', 'step': 10984, 'epoch': 2} {'type': 'loss', 'content': 0.08669538050889969, 'timestamp': '2025-09-10 02:48:16.844494', 'step': 10985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:16.899392', 'step': 10985, 'epoch': 2} {'type': 'loss', 'content': 0.10128474235534668, 'timestamp': '2025-09-10 02:48:16.901534', 'step': 10986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:16.956412', 'step': 10986, 'epoch': 2} {'type': 'loss', 'content': 0.2198631316423416, 'timestamp': '2025-09-10 02:48:16.958517', 'step': 10987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:17.014716', 'step': 10987, 'epoch': 2} {'type': 'loss', 'content': 0.10441436618566513, 'timestamp': '2025-09-10 02:48:17.020984', 'step': 10988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:17.074869', 'step': 10988, 'epoch': 2} {'type': 'loss', 'content': 0.11719085276126862, 'timestamp': '2025-09-10 02:48:17.076779', 'step': 10989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:17.135673', 'step': 10989, 'epoch': 2} {'type': 'loss', 'content': 0.12340109795331955, 'timestamp': '2025-09-10 02:48:17.137765', 'step': 10990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:17.192111', 'step': 10990, 'epoch': 2} {'type': 'loss', 'content': 0.12049161642789841, 'timestamp': '2025-09-10 02:48:17.194100', 'step': 10991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:17.248217', 'step': 10991, 'epoch': 2} {'type': 'loss', 'content': 0.1025625690817833, 'timestamp': '2025-09-10 02:48:17.254268', 'step': 10992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:17.308273', 'step': 10992, 'epoch': 2} {'type': 'loss', 'content': 0.16501013934612274, 'timestamp': '2025-09-10 02:48:17.310230', 'step': 10993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:17.368264', 'step': 10993, 'epoch': 2} {'type': 'loss', 'content': 0.15787966549396515, 'timestamp': '2025-09-10 02:48:17.370357', 'step': 10994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:17.423708', 'step': 10994, 'epoch': 2} {'type': 'loss', 'content': 0.09357354789972305, 'timestamp': '2025-09-10 02:48:17.425679', 'step': 10995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:17.479806', 'step': 10995, 'epoch': 2} {'type': 'loss', 'content': 0.13528288900852203, 'timestamp': '2025-09-10 02:48:17.486239', 'step': 10996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:17.541632', 'step': 10996, 'epoch': 2} {'type': 'loss', 'content': 0.15257470309734344, 'timestamp': '2025-09-10 02:48:17.543600', 'step': 10997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:17.599614', 'step': 10997, 'epoch': 2} {'type': 'loss', 'content': 0.03680109605193138, 'timestamp': '2025-09-10 02:48:17.601702', 'step': 10998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:17.655971', 'step': 10998, 'epoch': 2} {'type': 'loss', 'content': 0.2033080905675888, 'timestamp': '2025-09-10 02:48:17.657912', 'step': 10999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:17.713141', 'step': 10999, 'epoch': 2} {'type': 'loss', 'content': 0.22077639400959015, 'timestamp': '2025-09-10 02:48:17.719281', 'step': 11000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11000', 'timestamp': '2025-09-10 02:48:18.267651', 'step': 11000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:18.326514', 'step': 11000, 'epoch': 2} {'type': 'loss', 'content': 0.1838795691728592, 'timestamp': '2025-09-10 02:48:18.328628', 'step': 11001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:18.382415', 'step': 11001, 'epoch': 2} {'type': 'loss', 'content': 0.17828631401062012, 'timestamp': '2025-09-10 02:48:18.384380', 'step': 11002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:18.437741', 'step': 11002, 'epoch': 2} {'type': 'loss', 'content': 0.10383734852075577, 'timestamp': '2025-09-10 02:48:18.439754', 'step': 11003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:18.492388', 'step': 11003, 'epoch': 2} {'type': 'loss', 'content': 0.10547586530447006, 'timestamp': '2025-09-10 02:48:18.498366', 'step': 11004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:18.550198', 'step': 11004, 'epoch': 2} {'type': 'loss', 'content': 0.14127790927886963, 'timestamp': '2025-09-10 02:48:18.552179', 'step': 11005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:18.606379', 'step': 11005, 'epoch': 2} {'type': 'loss', 'content': 0.21643124520778656, 'timestamp': '2025-09-10 02:48:18.608384', 'step': 11006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:18.662483', 'step': 11006, 'epoch': 2} {'type': 'loss', 'content': 0.15187984704971313, 'timestamp': '2025-09-10 02:48:18.664450', 'step': 11007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:18.718760', 'step': 11007, 'epoch': 2} {'type': 'loss', 'content': 0.06947283446788788, 'timestamp': '2025-09-10 02:48:18.726177', 'step': 11008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:18.782558', 'step': 11008, 'epoch': 2} {'type': 'loss', 'content': 0.057764336466789246, 'timestamp': '2025-09-10 02:48:18.784552', 'step': 11009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:18.837799', 'step': 11009, 'epoch': 2} {'type': 'loss', 'content': 0.144389808177948, 'timestamp': '2025-09-10 02:48:18.839901', 'step': 11010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:18.893669', 'step': 11010, 'epoch': 2} {'type': 'loss', 'content': 0.11439800262451172, 'timestamp': '2025-09-10 02:48:18.895727', 'step': 11011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:18.950362', 'step': 11011, 'epoch': 2} {'type': 'loss', 'content': 0.0820707455277443, 'timestamp': '2025-09-10 02:48:18.956358', 'step': 11012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:19.010365', 'step': 11012, 'epoch': 2} {'type': 'loss', 'content': 0.08282060921192169, 'timestamp': '2025-09-10 02:48:19.012083', 'step': 11013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:19.067354', 'step': 11013, 'epoch': 2} {'type': 'loss', 'content': 0.08670319616794586, 'timestamp': '2025-09-10 02:48:19.069479', 'step': 11014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:19.125032', 'step': 11014, 'epoch': 2} {'type': 'loss', 'content': 0.14196424186229706, 'timestamp': '2025-09-10 02:48:19.127205', 'step': 11015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:19.182524', 'step': 11015, 'epoch': 2} {'type': 'loss', 'content': 0.12552112340927124, 'timestamp': '2025-09-10 02:48:19.188613', 'step': 11016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:19.241361', 'step': 11016, 'epoch': 2} {'type': 'loss', 'content': 0.12360421568155289, 'timestamp': '2025-09-10 02:48:19.243543', 'step': 11017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:19.297562', 'step': 11017, 'epoch': 2} {'type': 'loss', 'content': 0.1501186043024063, 'timestamp': '2025-09-10 02:48:19.299656', 'step': 11018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:19.353421', 'step': 11018, 'epoch': 2} {'type': 'loss', 'content': 0.20743775367736816, 'timestamp': '2025-09-10 02:48:19.355454', 'step': 11019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:19.409903', 'step': 11019, 'epoch': 2} {'type': 'loss', 'content': 0.0502808652818203, 'timestamp': '2025-09-10 02:48:19.415937', 'step': 11020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:19.469702', 'step': 11020, 'epoch': 2} {'type': 'loss', 'content': 0.16260595619678497, 'timestamp': '2025-09-10 02:48:19.471758', 'step': 11021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:19.525807', 'step': 11021, 'epoch': 2} {'type': 'loss', 'content': 0.16292190551757812, 'timestamp': '2025-09-10 02:48:19.527903', 'step': 11022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:19.581677', 'step': 11022, 'epoch': 2} {'type': 'loss', 'content': 0.1069425493478775, 'timestamp': '2025-09-10 02:48:19.583896', 'step': 11023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:19.639924', 'step': 11023, 'epoch': 2} {'type': 'loss', 'content': 0.11341535300016403, 'timestamp': '2025-09-10 02:48:19.646226', 'step': 11024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:19.700372', 'step': 11024, 'epoch': 2} {'type': 'loss', 'content': 0.09721097350120544, 'timestamp': '2025-09-10 02:48:19.702556', 'step': 11025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:19.756841', 'step': 11025, 'epoch': 2} {'type': 'loss', 'content': 0.16809654235839844, 'timestamp': '2025-09-10 02:48:19.758943', 'step': 11026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:19.812193', 'step': 11026, 'epoch': 2} {'type': 'loss', 'content': 0.041386835277080536, 'timestamp': '2025-09-10 02:48:19.814299', 'step': 11027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:19.868247', 'step': 11027, 'epoch': 2} {'type': 'loss', 'content': 0.083111971616745, 'timestamp': '2025-09-10 02:48:19.874447', 'step': 11028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:19.927603', 'step': 11028, 'epoch': 2} {'type': 'loss', 'content': 0.18237300217151642, 'timestamp': '2025-09-10 02:48:19.930042', 'step': 11029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:19.985029', 'step': 11029, 'epoch': 2} {'type': 'loss', 'content': 0.17493034899234772, 'timestamp': '2025-09-10 02:48:19.987979', 'step': 11030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:20.041620', 'step': 11030, 'epoch': 2} {'type': 'loss', 'content': 0.13807781040668488, 'timestamp': '2025-09-10 02:48:20.043612', 'step': 11031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:20.096294', 'step': 11031, 'epoch': 2} {'type': 'loss', 'content': 0.14597737789154053, 'timestamp': '2025-09-10 02:48:20.102335', 'step': 11032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:20.154963', 'step': 11032, 'epoch': 2} {'type': 'loss', 'content': 0.13934600353240967, 'timestamp': '2025-09-10 02:48:20.156836', 'step': 11033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:20.210238', 'step': 11033, 'epoch': 2} {'type': 'loss', 'content': 0.2164379358291626, 'timestamp': '2025-09-10 02:48:20.212324', 'step': 11034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:20.265749', 'step': 11034, 'epoch': 2} {'type': 'loss', 'content': 0.14744317531585693, 'timestamp': '2025-09-10 02:48:20.267728', 'step': 11035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:20.321171', 'step': 11035, 'epoch': 2} {'type': 'loss', 'content': 0.17545177042484283, 'timestamp': '2025-09-10 02:48:20.327103', 'step': 11036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:20.379744', 'step': 11036, 'epoch': 2} {'type': 'loss', 'content': 0.16595764458179474, 'timestamp': '2025-09-10 02:48:20.381706', 'step': 11037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:20.435036', 'step': 11037, 'epoch': 2} {'type': 'loss', 'content': 0.15310202538967133, 'timestamp': '2025-09-10 02:48:20.437019', 'step': 11038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:20.490506', 'step': 11038, 'epoch': 2} {'type': 'loss', 'content': 0.07402925938367844, 'timestamp': '2025-09-10 02:48:20.492466', 'step': 11039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:20.545341', 'step': 11039, 'epoch': 2} {'type': 'loss', 'content': 0.08682185411453247, 'timestamp': '2025-09-10 02:48:20.551217', 'step': 11040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:20.603894', 'step': 11040, 'epoch': 2} {'type': 'loss', 'content': 0.10182804614305496, 'timestamp': '2025-09-10 02:48:20.605904', 'step': 11041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:20.659345', 'step': 11041, 'epoch': 2} {'type': 'loss', 'content': 0.09056837856769562, 'timestamp': '2025-09-10 02:48:20.661410', 'step': 11042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:20.715108', 'step': 11042, 'epoch': 2} {'type': 'loss', 'content': 0.07376908510923386, 'timestamp': '2025-09-10 02:48:20.717440', 'step': 11043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:20.772736', 'step': 11043, 'epoch': 2} {'type': 'loss', 'content': 0.1589718610048294, 'timestamp': '2025-09-10 02:48:20.778999', 'step': 11044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:20.832410', 'step': 11044, 'epoch': 2} {'type': 'loss', 'content': 0.17104819416999817, 'timestamp': '2025-09-10 02:48:20.834506', 'step': 11045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:20.887840', 'step': 11045, 'epoch': 2} {'type': 'loss', 'content': 0.23561297357082367, 'timestamp': '2025-09-10 02:48:20.889797', 'step': 11046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:20.942549', 'step': 11046, 'epoch': 2} {'type': 'loss', 'content': 0.08228020370006561, 'timestamp': '2025-09-10 02:48:20.944470', 'step': 11047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:20.997815', 'step': 11047, 'epoch': 2} {'type': 'loss', 'content': 0.09679261595010757, 'timestamp': '2025-09-10 02:48:21.003675', 'step': 11048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:21.056118', 'step': 11048, 'epoch': 2} {'type': 'loss', 'content': 0.13382543623447418, 'timestamp': '2025-09-10 02:48:21.058230', 'step': 11049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:21.115004', 'step': 11049, 'epoch': 2} {'type': 'loss', 'content': 0.15996460616588593, 'timestamp': '2025-09-10 02:48:21.116997', 'step': 11050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:21.175838', 'step': 11050, 'epoch': 2} {'type': 'loss', 'content': 0.0721026286482811, 'timestamp': '2025-09-10 02:48:21.177834', 'step': 11051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:21.232710', 'step': 11051, 'epoch': 2} {'type': 'loss', 'content': 0.09155836701393127, 'timestamp': '2025-09-10 02:48:21.238812', 'step': 11052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:21.292843', 'step': 11052, 'epoch': 2} {'type': 'loss', 'content': 0.07893598824739456, 'timestamp': '2025-09-10 02:48:21.294725', 'step': 11053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:21.352879', 'step': 11053, 'epoch': 2} {'type': 'loss', 'content': 0.15999466180801392, 'timestamp': '2025-09-10 02:48:21.354846', 'step': 11054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:21.410346', 'step': 11054, 'epoch': 2} {'type': 'loss', 'content': 0.14883364737033844, 'timestamp': '2025-09-10 02:48:21.412280', 'step': 11055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:21.467453', 'step': 11055, 'epoch': 2} {'type': 'loss', 'content': 0.05854131281375885, 'timestamp': '2025-09-10 02:48:21.473555', 'step': 11056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:21.527385', 'step': 11056, 'epoch': 2} {'type': 'loss', 'content': 0.1691865622997284, 'timestamp': '2025-09-10 02:48:21.529493', 'step': 11057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:21.582480', 'step': 11057, 'epoch': 2} {'type': 'loss', 'content': 0.06667964905500412, 'timestamp': '2025-09-10 02:48:21.584725', 'step': 11058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:21.637904', 'step': 11058, 'epoch': 2} {'type': 'loss', 'content': 0.0874037966132164, 'timestamp': '2025-09-10 02:48:21.639863', 'step': 11059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:21.693244', 'step': 11059, 'epoch': 2} {'type': 'loss', 'content': 0.16933812201023102, 'timestamp': '2025-09-10 02:48:21.699380', 'step': 11060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:21.754972', 'step': 11060, 'epoch': 2} {'type': 'loss', 'content': 0.15986166894435883, 'timestamp': '2025-09-10 02:48:21.757163', 'step': 11061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:21.810403', 'step': 11061, 'epoch': 2} {'type': 'loss', 'content': 0.07733599841594696, 'timestamp': '2025-09-10 02:48:21.812384', 'step': 11062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:21.867564', 'step': 11062, 'epoch': 2} {'type': 'loss', 'content': 0.07099341601133347, 'timestamp': '2025-09-10 02:48:21.869587', 'step': 11063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:21.922907', 'step': 11063, 'epoch': 2} {'type': 'loss', 'content': 0.14876709878444672, 'timestamp': '2025-09-10 02:48:21.928903', 'step': 11064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:21.981267', 'step': 11064, 'epoch': 2} {'type': 'loss', 'content': 0.07354710251092911, 'timestamp': '2025-09-10 02:48:21.983230', 'step': 11065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:22.036635', 'step': 11065, 'epoch': 2} {'type': 'loss', 'content': 0.07841206341981888, 'timestamp': '2025-09-10 02:48:22.038616', 'step': 11066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:22.091818', 'step': 11066, 'epoch': 2} {'type': 'loss', 'content': 0.25759994983673096, 'timestamp': '2025-09-10 02:48:22.093969', 'step': 11067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:22.146536', 'step': 11067, 'epoch': 2} {'type': 'loss', 'content': 0.17444869875907898, 'timestamp': '2025-09-10 02:48:22.152325', 'step': 11068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:22.205005', 'step': 11068, 'epoch': 2} {'type': 'loss', 'content': 0.13186460733413696, 'timestamp': '2025-09-10 02:48:22.206946', 'step': 11069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:22.260342', 'step': 11069, 'epoch': 2} {'type': 'loss', 'content': 0.11276841163635254, 'timestamp': '2025-09-10 02:48:22.262456', 'step': 11070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:22.318927', 'step': 11070, 'epoch': 2} {'type': 'loss', 'content': 0.23523294925689697, 'timestamp': '2025-09-10 02:48:22.321157', 'step': 11071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:22.379063', 'step': 11071, 'epoch': 2} {'type': 'loss', 'content': 0.16158470511436462, 'timestamp': '2025-09-10 02:48:22.385384', 'step': 11072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:22.441356', 'step': 11072, 'epoch': 2} {'type': 'loss', 'content': 0.1337868869304657, 'timestamp': '2025-09-10 02:48:22.443299', 'step': 11073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:22.498978', 'step': 11073, 'epoch': 2} {'type': 'loss', 'content': 0.19079464673995972, 'timestamp': '2025-09-10 02:48:22.500945', 'step': 11074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:22.554255', 'step': 11074, 'epoch': 2} {'type': 'loss', 'content': 0.05366869270801544, 'timestamp': '2025-09-10 02:48:22.556420', 'step': 11075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:22.609807', 'step': 11075, 'epoch': 2} {'type': 'loss', 'content': 0.16183994710445404, 'timestamp': '2025-09-10 02:48:22.615763', 'step': 11076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:22.667729', 'step': 11076, 'epoch': 2} {'type': 'loss', 'content': 0.08051589876413345, 'timestamp': '2025-09-10 02:48:22.669714', 'step': 11077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:22.724050', 'step': 11077, 'epoch': 2} {'type': 'loss', 'content': 0.13189978897571564, 'timestamp': '2025-09-10 02:48:22.726070', 'step': 11078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:22.780214', 'step': 11078, 'epoch': 2} {'type': 'loss', 'content': 0.13760419189929962, 'timestamp': '2025-09-10 02:48:22.782346', 'step': 11079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:22.835441', 'step': 11079, 'epoch': 2} {'type': 'loss', 'content': 0.17758822441101074, 'timestamp': '2025-09-10 02:48:22.841546', 'step': 11080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:22.896149', 'step': 11080, 'epoch': 2} {'type': 'loss', 'content': 0.12956729531288147, 'timestamp': '2025-09-10 02:48:22.898184', 'step': 11081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:22.954116', 'step': 11081, 'epoch': 2} {'type': 'loss', 'content': 0.09918536245822906, 'timestamp': '2025-09-10 02:48:22.956071', 'step': 11082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:23.011002', 'step': 11082, 'epoch': 2} {'type': 'loss', 'content': 0.11441487818956375, 'timestamp': '2025-09-10 02:48:23.012999', 'step': 11083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:23.065783', 'step': 11083, 'epoch': 2} {'type': 'loss', 'content': 0.1818786859512329, 'timestamp': '2025-09-10 02:48:23.071866', 'step': 11084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:23.128602', 'step': 11084, 'epoch': 2} {'type': 'loss', 'content': 0.14634472131729126, 'timestamp': '2025-09-10 02:48:23.130478', 'step': 11085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:23.183264', 'step': 11085, 'epoch': 2} {'type': 'loss', 'content': 0.05333448201417923, 'timestamp': '2025-09-10 02:48:23.185330', 'step': 11086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:23.238475', 'step': 11086, 'epoch': 2} {'type': 'loss', 'content': 0.06931553035974503, 'timestamp': '2025-09-10 02:48:23.240517', 'step': 11087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:23.292941', 'step': 11087, 'epoch': 2} {'type': 'loss', 'content': 0.10212313383817673, 'timestamp': '2025-09-10 02:48:23.298786', 'step': 11088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:23.350883', 'step': 11088, 'epoch': 2} {'type': 'loss', 'content': 0.17509377002716064, 'timestamp': '2025-09-10 02:48:23.353014', 'step': 11089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:23.406210', 'step': 11089, 'epoch': 2} {'type': 'loss', 'content': 0.1370539665222168, 'timestamp': '2025-09-10 02:48:23.408185', 'step': 11090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:23.461946', 'step': 11090, 'epoch': 2} {'type': 'loss', 'content': 0.1444668471813202, 'timestamp': '2025-09-10 02:48:23.463906', 'step': 11091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:23.517144', 'step': 11091, 'epoch': 2} {'type': 'loss', 'content': 0.16126275062561035, 'timestamp': '2025-09-10 02:48:23.523091', 'step': 11092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:23.575670', 'step': 11092, 'epoch': 2} {'type': 'loss', 'content': 0.19438251852989197, 'timestamp': '2025-09-10 02:48:23.577630', 'step': 11093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:23.630856', 'step': 11093, 'epoch': 2} {'type': 'loss', 'content': 0.12422817200422287, 'timestamp': '2025-09-10 02:48:23.632965', 'step': 11094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:23.688013', 'step': 11094, 'epoch': 2} {'type': 'loss', 'content': 0.08401889353990555, 'timestamp': '2025-09-10 02:48:23.691184', 'step': 11095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:23.745016', 'step': 11095, 'epoch': 2} {'type': 'loss', 'content': 0.15938320755958557, 'timestamp': '2025-09-10 02:48:23.750984', 'step': 11096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:23.808392', 'step': 11096, 'epoch': 2} {'type': 'loss', 'content': 0.13238824903964996, 'timestamp': '2025-09-10 02:48:23.810438', 'step': 11097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:23.864815', 'step': 11097, 'epoch': 2} {'type': 'loss', 'content': 0.11715381592512131, 'timestamp': '2025-09-10 02:48:23.866775', 'step': 11098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:23.922455', 'step': 11098, 'epoch': 2} {'type': 'loss', 'content': 0.12442835420370102, 'timestamp': '2025-09-10 02:48:23.924433', 'step': 11099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:23.990222', 'step': 11099, 'epoch': 2} {'type': 'loss', 'content': 0.17446009814739227, 'timestamp': '2025-09-10 02:48:23.996478', 'step': 11100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:24.051081', 'step': 11100, 'epoch': 2} {'type': 'loss', 'content': 0.14080919325351715, 'timestamp': '2025-09-10 02:48:24.053121', 'step': 11101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:24.108352', 'step': 11101, 'epoch': 2} {'type': 'loss', 'content': 0.13205590844154358, 'timestamp': '2025-09-10 02:48:24.110293', 'step': 11102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:24.163282', 'step': 11102, 'epoch': 2} {'type': 'loss', 'content': 0.11546433717012405, 'timestamp': '2025-09-10 02:48:24.165347', 'step': 11103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:24.217994', 'step': 11103, 'epoch': 2} {'type': 'loss', 'content': 0.1145487129688263, 'timestamp': '2025-09-10 02:48:24.223931', 'step': 11104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:24.277424', 'step': 11104, 'epoch': 2} {'type': 'loss', 'content': 0.10016200691461563, 'timestamp': '2025-09-10 02:48:24.279351', 'step': 11105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:24.332493', 'step': 11105, 'epoch': 2} {'type': 'loss', 'content': 0.1062641367316246, 'timestamp': '2025-09-10 02:48:24.334375', 'step': 11106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:24.387580', 'step': 11106, 'epoch': 2} {'type': 'loss', 'content': 0.0791945531964302, 'timestamp': '2025-09-10 02:48:24.389605', 'step': 11107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:24.443082', 'step': 11107, 'epoch': 2} {'type': 'loss', 'content': 0.19382569193840027, 'timestamp': '2025-09-10 02:48:24.448874', 'step': 11108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:24.502002', 'step': 11108, 'epoch': 2} {'type': 'loss', 'content': 0.1346246898174286, 'timestamp': '2025-09-10 02:48:24.503992', 'step': 11109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:24.557028', 'step': 11109, 'epoch': 2} {'type': 'loss', 'content': 0.2640514075756073, 'timestamp': '2025-09-10 02:48:24.559015', 'step': 11110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:24.613228', 'step': 11110, 'epoch': 2} {'type': 'loss', 'content': 0.1165093258023262, 'timestamp': '2025-09-10 02:48:24.615200', 'step': 11111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:48:24.676535', 'step': 11111, 'epoch': 2} {'type': 'loss', 'content': 0.09746523946523666, 'timestamp': '2025-09-10 02:48:24.687869', 'step': 11112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:24.740254', 'step': 11112, 'epoch': 2} {'type': 'loss', 'content': 0.07603023201227188, 'timestamp': '2025-09-10 02:48:24.742190', 'step': 11113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:24.795189', 'step': 11113, 'epoch': 2} {'type': 'loss', 'content': 0.12035445868968964, 'timestamp': '2025-09-10 02:48:24.797309', 'step': 11114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:24.852236', 'step': 11114, 'epoch': 2} {'type': 'loss', 'content': 0.06146113574504852, 'timestamp': '2025-09-10 02:48:24.854360', 'step': 11115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:24.910756', 'step': 11115, 'epoch': 2} {'type': 'loss', 'content': 0.12428990006446838, 'timestamp': '2025-09-10 02:48:24.916639', 'step': 11116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:24.970654', 'step': 11116, 'epoch': 2} {'type': 'loss', 'content': 0.1517385095357895, 'timestamp': '2025-09-10 02:48:24.972578', 'step': 11117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:25.029068', 'step': 11117, 'epoch': 2} {'type': 'loss', 'content': 0.09346570819616318, 'timestamp': '2025-09-10 02:48:25.031056', 'step': 11118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:25.085833', 'step': 11118, 'epoch': 2} {'type': 'loss', 'content': 0.08770481497049332, 'timestamp': '2025-09-10 02:48:25.087702', 'step': 11119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:25.152352', 'step': 11119, 'epoch': 2} {'type': 'loss', 'content': 0.24918000400066376, 'timestamp': '2025-09-10 02:48:25.158444', 'step': 11120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:25.214364', 'step': 11120, 'epoch': 2} {'type': 'loss', 'content': 0.13796791434288025, 'timestamp': '2025-09-10 02:48:25.216276', 'step': 11121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:25.270538', 'step': 11121, 'epoch': 2} {'type': 'loss', 'content': 0.16139982640743256, 'timestamp': '2025-09-10 02:48:25.272649', 'step': 11122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:25.328865', 'step': 11122, 'epoch': 2} {'type': 'loss', 'content': 0.22009406983852386, 'timestamp': '2025-09-10 02:48:25.330878', 'step': 11123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:25.386997', 'step': 11123, 'epoch': 2} {'type': 'loss', 'content': 0.12729907035827637, 'timestamp': '2025-09-10 02:48:25.393025', 'step': 11124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:25.446174', 'step': 11124, 'epoch': 2} {'type': 'loss', 'content': 0.11813264340162277, 'timestamp': '2025-09-10 02:48:25.449479', 'step': 11125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:25.504930', 'step': 11125, 'epoch': 2} {'type': 'loss', 'content': 0.16688282787799835, 'timestamp': '2025-09-10 02:48:25.507028', 'step': 11126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:25.561500', 'step': 11126, 'epoch': 2} {'type': 'loss', 'content': 0.17804038524627686, 'timestamp': '2025-09-10 02:48:25.563562', 'step': 11127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:25.617317', 'step': 11127, 'epoch': 2} {'type': 'loss', 'content': 0.1008012518286705, 'timestamp': '2025-09-10 02:48:25.623579', 'step': 11128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:25.680503', 'step': 11128, 'epoch': 2} {'type': 'loss', 'content': 0.07016484439373016, 'timestamp': '2025-09-10 02:48:25.682593', 'step': 11129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:25.737176', 'step': 11129, 'epoch': 2} {'type': 'loss', 'content': 0.13610464334487915, 'timestamp': '2025-09-10 02:48:25.739486', 'step': 11130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:25.793875', 'step': 11130, 'epoch': 2} {'type': 'loss', 'content': 0.08607390522956848, 'timestamp': '2025-09-10 02:48:25.795916', 'step': 11131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:25.852522', 'step': 11131, 'epoch': 2} {'type': 'loss', 'content': 0.15125425159931183, 'timestamp': '2025-09-10 02:48:25.858708', 'step': 11132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:25.916113', 'step': 11132, 'epoch': 2} {'type': 'loss', 'content': 0.09447892010211945, 'timestamp': '2025-09-10 02:48:25.918063', 'step': 11133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:25.971071', 'step': 11133, 'epoch': 2} {'type': 'loss', 'content': 0.12627579271793365, 'timestamp': '2025-09-10 02:48:25.973010', 'step': 11134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:26.031374', 'step': 11134, 'epoch': 2} {'type': 'loss', 'content': 0.2474786639213562, 'timestamp': '2025-09-10 02:48:26.034217', 'step': 11135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:26.090944', 'step': 11135, 'epoch': 2} {'type': 'loss', 'content': 0.0404864065349102, 'timestamp': '2025-09-10 02:48:26.096951', 'step': 11136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:26.150346', 'step': 11136, 'epoch': 2} {'type': 'loss', 'content': 0.12279444932937622, 'timestamp': '2025-09-10 02:48:26.152297', 'step': 11137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:26.206604', 'step': 11137, 'epoch': 2} {'type': 'loss', 'content': 0.13044720888137817, 'timestamp': '2025-09-10 02:48:26.208571', 'step': 11138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:26.261620', 'step': 11138, 'epoch': 2} {'type': 'loss', 'content': 0.1292072832584381, 'timestamp': '2025-09-10 02:48:26.264104', 'step': 11139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:26.318993', 'step': 11139, 'epoch': 2} {'type': 'loss', 'content': 0.1696714460849762, 'timestamp': '2025-09-10 02:48:26.324906', 'step': 11140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:26.377377', 'step': 11140, 'epoch': 2} {'type': 'loss', 'content': 0.11418198049068451, 'timestamp': '2025-09-10 02:48:26.379505', 'step': 11141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:26.432051', 'step': 11141, 'epoch': 2} {'type': 'loss', 'content': 0.07595225423574448, 'timestamp': '2025-09-10 02:48:26.434006', 'step': 11142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:26.489791', 'step': 11142, 'epoch': 2} {'type': 'loss', 'content': 0.13871707022190094, 'timestamp': '2025-09-10 02:48:26.491937', 'step': 11143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:26.547933', 'step': 11143, 'epoch': 2} {'type': 'loss', 'content': 0.11346912384033203, 'timestamp': '2025-09-10 02:48:26.554024', 'step': 11144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:26.607271', 'step': 11144, 'epoch': 2} {'type': 'loss', 'content': 0.10099756717681885, 'timestamp': '2025-09-10 02:48:26.609305', 'step': 11145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:26.665583', 'step': 11145, 'epoch': 2} {'type': 'loss', 'content': 0.28200563788414, 'timestamp': '2025-09-10 02:48:26.667510', 'step': 11146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:26.723227', 'step': 11146, 'epoch': 2} {'type': 'loss', 'content': 0.13061319291591644, 'timestamp': '2025-09-10 02:48:26.725258', 'step': 11147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:26.778527', 'step': 11147, 'epoch': 2} {'type': 'loss', 'content': 0.11560441553592682, 'timestamp': '2025-09-10 02:48:26.784500', 'step': 11148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:26.837299', 'step': 11148, 'epoch': 2} {'type': 'loss', 'content': 0.14939545094966888, 'timestamp': '2025-09-10 02:48:26.839299', 'step': 11149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:26.891926', 'step': 11149, 'epoch': 2} {'type': 'loss', 'content': 0.16618946194648743, 'timestamp': '2025-09-10 02:48:26.894036', 'step': 11150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:26.949198', 'step': 11150, 'epoch': 2} {'type': 'loss', 'content': 0.15908858180046082, 'timestamp': '2025-09-10 02:48:26.951108', 'step': 11151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:27.005291', 'step': 11151, 'epoch': 2} {'type': 'loss', 'content': 0.1159651055932045, 'timestamp': '2025-09-10 02:48:27.011335', 'step': 11152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:27.064471', 'step': 11152, 'epoch': 2} {'type': 'loss', 'content': 0.10028474777936935, 'timestamp': '2025-09-10 02:48:27.066526', 'step': 11153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:27.122242', 'step': 11153, 'epoch': 2} {'type': 'loss', 'content': 0.19207341969013214, 'timestamp': '2025-09-10 02:48:27.124214', 'step': 11154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:27.181372', 'step': 11154, 'epoch': 2} {'type': 'loss', 'content': 0.22878040373325348, 'timestamp': '2025-09-10 02:48:27.183398', 'step': 11155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:27.237700', 'step': 11155, 'epoch': 2} {'type': 'loss', 'content': 0.07503201812505722, 'timestamp': '2025-09-10 02:48:27.243825', 'step': 11156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:27.299734', 'step': 11156, 'epoch': 2} {'type': 'loss', 'content': 0.03708907216787338, 'timestamp': '2025-09-10 02:48:27.301898', 'step': 11157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:27.356205', 'step': 11157, 'epoch': 2} {'type': 'loss', 'content': 0.13215027749538422, 'timestamp': '2025-09-10 02:48:27.358353', 'step': 11158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:27.412825', 'step': 11158, 'epoch': 2} {'type': 'loss', 'content': 0.0726136639714241, 'timestamp': '2025-09-10 02:48:27.414884', 'step': 11159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:27.470183', 'step': 11159, 'epoch': 2} {'type': 'loss', 'content': 0.05821596086025238, 'timestamp': '2025-09-10 02:48:27.476257', 'step': 11160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:27.530207', 'step': 11160, 'epoch': 2} {'type': 'loss', 'content': 0.18750908970832825, 'timestamp': '2025-09-10 02:48:27.532260', 'step': 11161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:27.591555', 'step': 11161, 'epoch': 2} {'type': 'loss', 'content': 0.1134667694568634, 'timestamp': '2025-09-10 02:48:27.593612', 'step': 11162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:27.647668', 'step': 11162, 'epoch': 2} {'type': 'loss', 'content': 0.1232096403837204, 'timestamp': '2025-09-10 02:48:27.649683', 'step': 11163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:27.707675', 'step': 11163, 'epoch': 2} {'type': 'loss', 'content': 0.1818804144859314, 'timestamp': '2025-09-10 02:48:27.713803', 'step': 11164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:27.766965', 'step': 11164, 'epoch': 2} {'type': 'loss', 'content': 0.13639643788337708, 'timestamp': '2025-09-10 02:48:27.768830', 'step': 11165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:27.822536', 'step': 11165, 'epoch': 2} {'type': 'loss', 'content': 0.09420878440141678, 'timestamp': '2025-09-10 02:48:27.824786', 'step': 11166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:27.880055', 'step': 11166, 'epoch': 2} {'type': 'loss', 'content': 0.21370892226696014, 'timestamp': '2025-09-10 02:48:27.882306', 'step': 11167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:27.938527', 'step': 11167, 'epoch': 2} {'type': 'loss', 'content': 0.17143064737319946, 'timestamp': '2025-09-10 02:48:27.945202', 'step': 11168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:27.998823', 'step': 11168, 'epoch': 2} {'type': 'loss', 'content': 0.14849451184272766, 'timestamp': '2025-09-10 02:48:28.001047', 'step': 11169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:28.054191', 'step': 11169, 'epoch': 2} {'type': 'loss', 'content': 0.07738931477069855, 'timestamp': '2025-09-10 02:48:28.056415', 'step': 11170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:28.109330', 'step': 11170, 'epoch': 2} {'type': 'loss', 'content': 0.15881821513175964, 'timestamp': '2025-09-10 02:48:28.111533', 'step': 11171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:28.164196', 'step': 11171, 'epoch': 2} {'type': 'loss', 'content': 0.08247003704309464, 'timestamp': '2025-09-10 02:48:28.170058', 'step': 11172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:28.224392', 'step': 11172, 'epoch': 2} {'type': 'loss', 'content': 0.1922796368598938, 'timestamp': '2025-09-10 02:48:28.226684', 'step': 11173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:28.279874', 'step': 11173, 'epoch': 2} {'type': 'loss', 'content': 0.16432656347751617, 'timestamp': '2025-09-10 02:48:28.282016', 'step': 11174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:28.334819', 'step': 11174, 'epoch': 2} {'type': 'loss', 'content': 0.18543167412281036, 'timestamp': '2025-09-10 02:48:28.337012', 'step': 11175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:28.389983', 'step': 11175, 'epoch': 2} {'type': 'loss', 'content': 0.23172929883003235, 'timestamp': '2025-09-10 02:48:28.396075', 'step': 11176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:28.449114', 'step': 11176, 'epoch': 2} {'type': 'loss', 'content': 0.08826762437820435, 'timestamp': '2025-09-10 02:48:28.451136', 'step': 11177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:28.511684', 'step': 11177, 'epoch': 2} {'type': 'loss', 'content': 0.10703763365745544, 'timestamp': '2025-09-10 02:48:28.513821', 'step': 11178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:28.567217', 'step': 11178, 'epoch': 2} {'type': 'loss', 'content': 0.1374818980693817, 'timestamp': '2025-09-10 02:48:28.569530', 'step': 11179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:28.623290', 'step': 11179, 'epoch': 2} {'type': 'loss', 'content': 0.10080523043870926, 'timestamp': '2025-09-10 02:48:28.629273', 'step': 11180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:28.681724', 'step': 11180, 'epoch': 2} {'type': 'loss', 'content': 0.1516042798757553, 'timestamp': '2025-09-10 02:48:28.683873', 'step': 11181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:28.738206', 'step': 11181, 'epoch': 2} {'type': 'loss', 'content': 0.04983261600136757, 'timestamp': '2025-09-10 02:48:28.740354', 'step': 11182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:28.794684', 'step': 11182, 'epoch': 2} {'type': 'loss', 'content': 0.17282655835151672, 'timestamp': '2025-09-10 02:48:28.796872', 'step': 11183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:28.850860', 'step': 11183, 'epoch': 2} {'type': 'loss', 'content': 0.16659364104270935, 'timestamp': '2025-09-10 02:48:28.857001', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:48:41.452886', 'step': 11184, 'epoch': 2} {'type': 'pplx', 'content': 13777.396183896732, 'timestamp': '2025-09-10 02:48:41.455929', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:41.509322', 'step': 11184, 'epoch': 2} {'type': 'loss', 'content': 0.13087007403373718, 'timestamp': '2025-09-10 02:48:41.511302', 'step': 11185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:41.566739', 'step': 11185, 'epoch': 2} {'type': 'loss', 'content': 0.12841984629631042, 'timestamp': '2025-09-10 02:48:41.568755', 'step': 11186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:41.622404', 'step': 11186, 'epoch': 2} {'type': 'loss', 'content': 0.12823504209518433, 'timestamp': '2025-09-10 02:48:41.624344', 'step': 11187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:41.677196', 'step': 11187, 'epoch': 2} {'type': 'loss', 'content': 0.1191219761967659, 'timestamp': '2025-09-10 02:48:41.683096', 'step': 11188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:41.735000', 'step': 11188, 'epoch': 2} {'type': 'loss', 'content': 0.0805232971906662, 'timestamp': '2025-09-10 02:48:41.736797', 'step': 11189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:41.789383', 'step': 11189, 'epoch': 2} {'type': 'loss', 'content': 0.14863334596157074, 'timestamp': '2025-09-10 02:48:41.791434', 'step': 11190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:41.845123', 'step': 11190, 'epoch': 2} {'type': 'loss', 'content': 0.142916738986969, 'timestamp': '2025-09-10 02:48:41.847022', 'step': 11191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:41.900780', 'step': 11191, 'epoch': 2} {'type': 'loss', 'content': 0.21085409820079803, 'timestamp': '2025-09-10 02:48:41.906751', 'step': 11192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:41.959936', 'step': 11192, 'epoch': 2} {'type': 'loss', 'content': 0.16900865733623505, 'timestamp': '2025-09-10 02:48:41.961965', 'step': 11193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:42.014787', 'step': 11193, 'epoch': 2} {'type': 'loss', 'content': 0.15467636287212372, 'timestamp': '2025-09-10 02:48:42.016870', 'step': 11194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:42.069520', 'step': 11194, 'epoch': 2} {'type': 'loss', 'content': 0.12619705498218536, 'timestamp': '2025-09-10 02:48:42.071787', 'step': 11195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:42.125390', 'step': 11195, 'epoch': 2} {'type': 'loss', 'content': 0.16300243139266968, 'timestamp': '2025-09-10 02:48:42.131347', 'step': 11196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:42.183185', 'step': 11196, 'epoch': 2} {'type': 'loss', 'content': 0.124974325299263, 'timestamp': '2025-09-10 02:48:42.185247', 'step': 11197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:42.238053', 'step': 11197, 'epoch': 2} {'type': 'loss', 'content': 0.04006407409906387, 'timestamp': '2025-09-10 02:48:42.240337', 'step': 11198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:42.293638', 'step': 11198, 'epoch': 2} {'type': 'loss', 'content': 0.12407226860523224, 'timestamp': '2025-09-10 02:48:42.295957', 'step': 11199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:42.348977', 'step': 11199, 'epoch': 2} {'type': 'loss', 'content': 0.14638227224349976, 'timestamp': '2025-09-10 02:48:42.355326', 'step': 11200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:42.409887', 'step': 11200, 'epoch': 2} {'type': 'loss', 'content': 0.1480085253715515, 'timestamp': '2025-09-10 02:48:42.411659', 'step': 11201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:42.464710', 'step': 11201, 'epoch': 2} {'type': 'loss', 'content': 0.06284117698669434, 'timestamp': '2025-09-10 02:48:42.466868', 'step': 11202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:42.520262', 'step': 11202, 'epoch': 2} {'type': 'loss', 'content': 0.15277798473834991, 'timestamp': '2025-09-10 02:48:42.522528', 'step': 11203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:42.576500', 'step': 11203, 'epoch': 2} {'type': 'loss', 'content': 0.1563076376914978, 'timestamp': '2025-09-10 02:48:42.582431', 'step': 11204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:42.635437', 'step': 11204, 'epoch': 2} {'type': 'loss', 'content': 0.11653623729944229, 'timestamp': '2025-09-10 02:48:42.637582', 'step': 11205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:42.690984', 'step': 11205, 'epoch': 2} {'type': 'loss', 'content': 0.1010870710015297, 'timestamp': '2025-09-10 02:48:42.692992', 'step': 11206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:42.746547', 'step': 11206, 'epoch': 2} {'type': 'loss', 'content': 0.10875478386878967, 'timestamp': '2025-09-10 02:48:42.748401', 'step': 11207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:42.801796', 'step': 11207, 'epoch': 2} {'type': 'loss', 'content': 0.11122650653123856, 'timestamp': '2025-09-10 02:48:42.807569', 'step': 11208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:42.859607', 'step': 11208, 'epoch': 2} {'type': 'loss', 'content': 0.16497762501239777, 'timestamp': '2025-09-10 02:48:42.861798', 'step': 11209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:42.914887', 'step': 11209, 'epoch': 2} {'type': 'loss', 'content': 0.13016235828399658, 'timestamp': '2025-09-10 02:48:42.917230', 'step': 11210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:42.972138', 'step': 11210, 'epoch': 2} {'type': 'loss', 'content': 0.13771522045135498, 'timestamp': '2025-09-10 02:48:42.974076', 'step': 11211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:43.026791', 'step': 11211, 'epoch': 2} {'type': 'loss', 'content': 0.05750042572617531, 'timestamp': '2025-09-10 02:48:43.032713', 'step': 11212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:43.085236', 'step': 11212, 'epoch': 2} {'type': 'loss', 'content': 0.16736888885498047, 'timestamp': '2025-09-10 02:48:43.087406', 'step': 11213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:43.140070', 'step': 11213, 'epoch': 2} {'type': 'loss', 'content': 0.09993962943553925, 'timestamp': '2025-09-10 02:48:43.142316', 'step': 11214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:43.195518', 'step': 11214, 'epoch': 2} {'type': 'loss', 'content': 0.1041659489274025, 'timestamp': '2025-09-10 02:48:43.197605', 'step': 11215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:43.251154', 'step': 11215, 'epoch': 2} {'type': 'loss', 'content': 0.11783624440431595, 'timestamp': '2025-09-10 02:48:43.256865', 'step': 11216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:43.309049', 'step': 11216, 'epoch': 2} {'type': 'loss', 'content': 0.11421869695186615, 'timestamp': '2025-09-10 02:48:43.310998', 'step': 11217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:43.365435', 'step': 11217, 'epoch': 2} {'type': 'loss', 'content': 0.21572257578372955, 'timestamp': '2025-09-10 02:48:43.367725', 'step': 11218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:43.421850', 'step': 11218, 'epoch': 2} {'type': 'loss', 'content': 0.06312011927366257, 'timestamp': '2025-09-10 02:48:43.423851', 'step': 11219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:43.478555', 'step': 11219, 'epoch': 2} {'type': 'loss', 'content': 0.057628292590379715, 'timestamp': '2025-09-10 02:48:43.484741', 'step': 11220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:43.537893', 'step': 11220, 'epoch': 2} {'type': 'loss', 'content': 0.10132943838834763, 'timestamp': '2025-09-10 02:48:43.540004', 'step': 11221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:43.596147', 'step': 11221, 'epoch': 2} {'type': 'loss', 'content': 0.10660490393638611, 'timestamp': '2025-09-10 02:48:43.598375', 'step': 11222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:43.652516', 'step': 11222, 'epoch': 2} {'type': 'loss', 'content': 0.10097146779298782, 'timestamp': '2025-09-10 02:48:43.654822', 'step': 11223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:43.712765', 'step': 11223, 'epoch': 2} {'type': 'loss', 'content': 0.09676162898540497, 'timestamp': '2025-09-10 02:48:43.718892', 'step': 11224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:43.777762', 'step': 11224, 'epoch': 2} {'type': 'loss', 'content': 0.05742936581373215, 'timestamp': '2025-09-10 02:48:43.780140', 'step': 11225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:43.833898', 'step': 11225, 'epoch': 2} {'type': 'loss', 'content': 0.0575786717236042, 'timestamp': '2025-09-10 02:48:43.836274', 'step': 11226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:43.898864', 'step': 11226, 'epoch': 2} {'type': 'loss', 'content': 0.08810397237539291, 'timestamp': '2025-09-10 02:48:43.901186', 'step': 11227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:43.958155', 'step': 11227, 'epoch': 2} {'type': 'loss', 'content': 0.128217414021492, 'timestamp': '2025-09-10 02:48:43.964696', 'step': 11228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:44.021398', 'step': 11228, 'epoch': 2} {'type': 'loss', 'content': 0.1416328400373459, 'timestamp': '2025-09-10 02:48:44.023676', 'step': 11229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:44.077669', 'step': 11229, 'epoch': 2} {'type': 'loss', 'content': 0.10608139634132385, 'timestamp': '2025-09-10 02:48:44.079954', 'step': 11230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:44.133718', 'step': 11230, 'epoch': 2} {'type': 'loss', 'content': 0.08717651665210724, 'timestamp': '2025-09-10 02:48:44.136080', 'step': 11231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:44.190835', 'step': 11231, 'epoch': 2} {'type': 'loss', 'content': 0.10772943496704102, 'timestamp': '2025-09-10 02:48:44.197066', 'step': 11232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:44.250842', 'step': 11232, 'epoch': 2} {'type': 'loss', 'content': 0.11891534179449081, 'timestamp': '2025-09-10 02:48:44.253057', 'step': 11233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:44.306060', 'step': 11233, 'epoch': 2} {'type': 'loss', 'content': 0.17097656428813934, 'timestamp': '2025-09-10 02:48:44.308193', 'step': 11234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:44.360862', 'step': 11234, 'epoch': 2} {'type': 'loss', 'content': 0.03386947512626648, 'timestamp': '2025-09-10 02:48:44.362981', 'step': 11235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:44.416613', 'step': 11235, 'epoch': 2} {'type': 'loss', 'content': 0.03969211503863335, 'timestamp': '2025-09-10 02:48:44.422482', 'step': 11236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:44.480994', 'step': 11236, 'epoch': 2} {'type': 'loss', 'content': 0.10352461040019989, 'timestamp': '2025-09-10 02:48:44.483301', 'step': 11237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:44.538798', 'step': 11237, 'epoch': 2} {'type': 'loss', 'content': 0.10775972902774811, 'timestamp': '2025-09-10 02:48:44.541057', 'step': 11238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:44.595049', 'step': 11238, 'epoch': 2} {'type': 'loss', 'content': 0.07377423346042633, 'timestamp': '2025-09-10 02:48:44.597501', 'step': 11239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:44.650298', 'step': 11239, 'epoch': 2} {'type': 'loss', 'content': 0.1081225797533989, 'timestamp': '2025-09-10 02:48:44.656537', 'step': 11240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:44.709801', 'step': 11240, 'epoch': 2} {'type': 'loss', 'content': 0.10174477100372314, 'timestamp': '2025-09-10 02:48:44.712412', 'step': 11241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:44.766343', 'step': 11241, 'epoch': 2} {'type': 'loss', 'content': 0.06412926316261292, 'timestamp': '2025-09-10 02:48:44.768864', 'step': 11242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:44.822606', 'step': 11242, 'epoch': 2} {'type': 'loss', 'content': 0.11864157021045685, 'timestamp': '2025-09-10 02:48:44.825111', 'step': 11243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:44.883028', 'step': 11243, 'epoch': 2} {'type': 'loss', 'content': 0.0989830419421196, 'timestamp': '2025-09-10 02:48:44.889198', 'step': 11244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:44.949550', 'step': 11244, 'epoch': 2} {'type': 'loss', 'content': 0.21013815701007843, 'timestamp': '2025-09-10 02:48:44.951890', 'step': 11245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:45.005670', 'step': 11245, 'epoch': 2} {'type': 'loss', 'content': 0.0635705441236496, 'timestamp': '2025-09-10 02:48:45.007914', 'step': 11246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:45.062813', 'step': 11246, 'epoch': 2} {'type': 'loss', 'content': 0.15861845016479492, 'timestamp': '2025-09-10 02:48:45.065177', 'step': 11247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:45.119255', 'step': 11247, 'epoch': 2} {'type': 'loss', 'content': 0.14775565266609192, 'timestamp': '2025-09-10 02:48:45.125578', 'step': 11248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:45.178478', 'step': 11248, 'epoch': 2} {'type': 'loss', 'content': 0.1688154637813568, 'timestamp': '2025-09-10 02:48:45.180739', 'step': 11249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:45.234619', 'step': 11249, 'epoch': 2} {'type': 'loss', 'content': 0.10657619684934616, 'timestamp': '2025-09-10 02:48:45.236870', 'step': 11250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:45.290464', 'step': 11250, 'epoch': 2} {'type': 'loss', 'content': 0.14401349425315857, 'timestamp': '2025-09-10 02:48:45.292568', 'step': 11251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:45.345335', 'step': 11251, 'epoch': 2} {'type': 'loss', 'content': 0.0899084061384201, 'timestamp': '2025-09-10 02:48:45.351510', 'step': 11252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:45.403575', 'step': 11252, 'epoch': 2} {'type': 'loss', 'content': 0.17891372740268707, 'timestamp': '2025-09-10 02:48:45.405806', 'step': 11253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:45.458711', 'step': 11253, 'epoch': 2} {'type': 'loss', 'content': 0.16209295392036438, 'timestamp': '2025-09-10 02:48:45.460927', 'step': 11254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:45.514144', 'step': 11254, 'epoch': 2} {'type': 'loss', 'content': 0.15077605843544006, 'timestamp': '2025-09-10 02:48:45.516251', 'step': 11255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:45.569273', 'step': 11255, 'epoch': 2} {'type': 'loss', 'content': 0.12433794140815735, 'timestamp': '2025-09-10 02:48:45.575350', 'step': 11256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:45.628511', 'step': 11256, 'epoch': 2} {'type': 'loss', 'content': 0.04541873186826706, 'timestamp': '2025-09-10 02:48:45.630656', 'step': 11257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:45.683781', 'step': 11257, 'epoch': 2} {'type': 'loss', 'content': 0.09380988776683807, 'timestamp': '2025-09-10 02:48:45.686053', 'step': 11258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:45.739347', 'step': 11258, 'epoch': 2} {'type': 'loss', 'content': 0.21268245577812195, 'timestamp': '2025-09-10 02:48:45.741457', 'step': 11259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:45.794547', 'step': 11259, 'epoch': 2} {'type': 'loss', 'content': 0.048902738839387894, 'timestamp': '2025-09-10 02:48:45.800514', 'step': 11260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:45.853098', 'step': 11260, 'epoch': 2} {'type': 'loss', 'content': 0.07401230186223984, 'timestamp': '2025-09-10 02:48:45.855413', 'step': 11261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:45.908763', 'step': 11261, 'epoch': 2} {'type': 'loss', 'content': 0.09264934062957764, 'timestamp': '2025-09-10 02:48:45.910860', 'step': 11262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:45.963756', 'step': 11262, 'epoch': 2} {'type': 'loss', 'content': 0.08888264745473862, 'timestamp': '2025-09-10 02:48:45.965942', 'step': 11263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:46.018988', 'step': 11263, 'epoch': 2} {'type': 'loss', 'content': 0.15311199426651, 'timestamp': '2025-09-10 02:48:46.024809', 'step': 11264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:46.076800', 'step': 11264, 'epoch': 2} {'type': 'loss', 'content': 0.06903044879436493, 'timestamp': '2025-09-10 02:48:46.078995', 'step': 11265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:46.133454', 'step': 11265, 'epoch': 2} {'type': 'loss', 'content': 0.07366614043712616, 'timestamp': '2025-09-10 02:48:46.135612', 'step': 11266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:46.188834', 'step': 11266, 'epoch': 2} {'type': 'loss', 'content': 0.148286372423172, 'timestamp': '2025-09-10 02:48:46.191133', 'step': 11267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:46.244438', 'step': 11267, 'epoch': 2} {'type': 'loss', 'content': 0.03980354219675064, 'timestamp': '2025-09-10 02:48:46.250494', 'step': 11268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:46.303808', 'step': 11268, 'epoch': 2} {'type': 'loss', 'content': 0.16319423913955688, 'timestamp': '2025-09-10 02:48:46.305888', 'step': 11269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:46.359542', 'step': 11269, 'epoch': 2} {'type': 'loss', 'content': 0.12103593349456787, 'timestamp': '2025-09-10 02:48:46.361572', 'step': 11270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:46.414649', 'step': 11270, 'epoch': 2} {'type': 'loss', 'content': 0.08687970042228699, 'timestamp': '2025-09-10 02:48:46.416793', 'step': 11271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:46.469614', 'step': 11271, 'epoch': 2} {'type': 'loss', 'content': 0.1705489605665207, 'timestamp': '2025-09-10 02:48:46.475683', 'step': 11272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:46.530555', 'step': 11272, 'epoch': 2} {'type': 'loss', 'content': 0.15510070323944092, 'timestamp': '2025-09-10 02:48:46.532582', 'step': 11273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:46.585584', 'step': 11273, 'epoch': 2} {'type': 'loss', 'content': 0.04760975018143654, 'timestamp': '2025-09-10 02:48:46.587858', 'step': 11274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:46.641556', 'step': 11274, 'epoch': 2} {'type': 'loss', 'content': 0.10171442478895187, 'timestamp': '2025-09-10 02:48:46.643660', 'step': 11275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:46.697926', 'step': 11275, 'epoch': 2} {'type': 'loss', 'content': 0.1541898250579834, 'timestamp': '2025-09-10 02:48:46.704020', 'step': 11276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:46.756606', 'step': 11276, 'epoch': 2} {'type': 'loss', 'content': 0.09661959856748581, 'timestamp': '2025-09-10 02:48:46.758946', 'step': 11277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:46.812819', 'step': 11277, 'epoch': 2} {'type': 'loss', 'content': 0.032377663999795914, 'timestamp': '2025-09-10 02:48:46.814895', 'step': 11278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:46.868396', 'step': 11278, 'epoch': 2} {'type': 'loss', 'content': 0.17204642295837402, 'timestamp': '2025-09-10 02:48:46.870506', 'step': 11279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:46.924508', 'step': 11279, 'epoch': 2} {'type': 'loss', 'content': 0.12698866426944733, 'timestamp': '2025-09-10 02:48:46.930428', 'step': 11280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:46.982767', 'step': 11280, 'epoch': 2} {'type': 'loss', 'content': 0.061698053032159805, 'timestamp': '2025-09-10 02:48:46.985132', 'step': 11281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:47.039742', 'step': 11281, 'epoch': 2} {'type': 'loss', 'content': 0.16503866016864777, 'timestamp': '2025-09-10 02:48:47.042195', 'step': 11282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:47.095106', 'step': 11282, 'epoch': 2} {'type': 'loss', 'content': 0.05067992955446243, 'timestamp': '2025-09-10 02:48:47.097552', 'step': 11283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:47.151305', 'step': 11283, 'epoch': 2} {'type': 'loss', 'content': 0.16379432380199432, 'timestamp': '2025-09-10 02:48:47.157694', 'step': 11284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:47.211555', 'step': 11284, 'epoch': 2} {'type': 'loss', 'content': 0.08522234112024307, 'timestamp': '2025-09-10 02:48:47.213891', 'step': 11285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:47.266712', 'step': 11285, 'epoch': 2} {'type': 'loss', 'content': 0.1967453807592392, 'timestamp': '2025-09-10 02:48:47.269077', 'step': 11286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:47.322551', 'step': 11286, 'epoch': 2} {'type': 'loss', 'content': 0.07614114135503769, 'timestamp': '2025-09-10 02:48:47.324861', 'step': 11287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:47.377866', 'step': 11287, 'epoch': 2} {'type': 'loss', 'content': 0.1679745763540268, 'timestamp': '2025-09-10 02:48:47.384346', 'step': 11288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:47.437523', 'step': 11288, 'epoch': 2} {'type': 'loss', 'content': 0.0947430357336998, 'timestamp': '2025-09-10 02:48:47.439847', 'step': 11289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:47.492310', 'step': 11289, 'epoch': 2} {'type': 'loss', 'content': 0.09443411231040955, 'timestamp': '2025-09-10 02:48:47.494353', 'step': 11290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:47.547107', 'step': 11290, 'epoch': 2} {'type': 'loss', 'content': 0.09055852890014648, 'timestamp': '2025-09-10 02:48:47.549371', 'step': 11291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:47.601622', 'step': 11291, 'epoch': 2} {'type': 'loss', 'content': 0.13949602842330933, 'timestamp': '2025-09-10 02:48:47.607562', 'step': 11292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:47.660489', 'step': 11292, 'epoch': 2} {'type': 'loss', 'content': 0.16611002385616302, 'timestamp': '2025-09-10 02:48:47.662808', 'step': 11293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:47.715675', 'step': 11293, 'epoch': 2} {'type': 'loss', 'content': 0.157567098736763, 'timestamp': '2025-09-10 02:48:47.717802', 'step': 11294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:47.771235', 'step': 11294, 'epoch': 2} {'type': 'loss', 'content': 0.15912330150604248, 'timestamp': '2025-09-10 02:48:47.773546', 'step': 11295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:47.826111', 'step': 11295, 'epoch': 2} {'type': 'loss', 'content': 0.11776214838027954, 'timestamp': '2025-09-10 02:48:47.832152', 'step': 11296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:47.883936', 'step': 11296, 'epoch': 2} {'type': 'loss', 'content': 0.1595769077539444, 'timestamp': '2025-09-10 02:48:47.886377', 'step': 11297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:47.940751', 'step': 11297, 'epoch': 2} {'type': 'loss', 'content': 0.08300887793302536, 'timestamp': '2025-09-10 02:48:47.942837', 'step': 11298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:47.995211', 'step': 11298, 'epoch': 2} {'type': 'loss', 'content': 0.12216494977474213, 'timestamp': '2025-09-10 02:48:47.997491', 'step': 11299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:48.051491', 'step': 11299, 'epoch': 2} {'type': 'loss', 'content': 0.1075253039598465, 'timestamp': '2025-09-10 02:48:48.057424', 'step': 11300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:48.109647', 'step': 11300, 'epoch': 2} {'type': 'loss', 'content': 0.1535254567861557, 'timestamp': '2025-09-10 02:48:48.111701', 'step': 11301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:48.164622', 'step': 11301, 'epoch': 2} {'type': 'loss', 'content': 0.12564267218112946, 'timestamp': '2025-09-10 02:48:48.166926', 'step': 11302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:48.220259', 'step': 11302, 'epoch': 2} {'type': 'loss', 'content': 0.16519314050674438, 'timestamp': '2025-09-10 02:48:48.222367', 'step': 11303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:48.275770', 'step': 11303, 'epoch': 2} {'type': 'loss', 'content': 0.15179114043712616, 'timestamp': '2025-09-10 02:48:48.281805', 'step': 11304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:48.337386', 'step': 11304, 'epoch': 2} {'type': 'loss', 'content': 0.22134137153625488, 'timestamp': '2025-09-10 02:48:48.339773', 'step': 11305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:48.394715', 'step': 11305, 'epoch': 2} {'type': 'loss', 'content': 0.12360572814941406, 'timestamp': '2025-09-10 02:48:48.397019', 'step': 11306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:48.455670', 'step': 11306, 'epoch': 2} {'type': 'loss', 'content': 0.21820783615112305, 'timestamp': '2025-09-10 02:48:48.463803', 'step': 11307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:48.520502', 'step': 11307, 'epoch': 2} {'type': 'loss', 'content': 0.19906659424304962, 'timestamp': '2025-09-10 02:48:48.526281', 'step': 11308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:48.578794', 'step': 11308, 'epoch': 2} {'type': 'loss', 'content': 0.17128410935401917, 'timestamp': '2025-09-10 02:48:48.581093', 'step': 11309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:48.636976', 'step': 11309, 'epoch': 2} {'type': 'loss', 'content': 0.12079664319753647, 'timestamp': '2025-09-10 02:48:48.640386', 'step': 11310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:48.700169', 'step': 11310, 'epoch': 2} {'type': 'loss', 'content': 0.14652740955352783, 'timestamp': '2025-09-10 02:48:48.702420', 'step': 11311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:48.756980', 'step': 11311, 'epoch': 2} {'type': 'loss', 'content': 0.11197109520435333, 'timestamp': '2025-09-10 02:48:48.765302', 'step': 11312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:48.824551', 'step': 11312, 'epoch': 2} {'type': 'loss', 'content': 0.14525695145130157, 'timestamp': '2025-09-10 02:48:48.828098', 'step': 11313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:48.886996', 'step': 11313, 'epoch': 2} {'type': 'loss', 'content': 0.12261693924665451, 'timestamp': '2025-09-10 02:48:48.889131', 'step': 11314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:48.942045', 'step': 11314, 'epoch': 2} {'type': 'loss', 'content': 0.12611748278141022, 'timestamp': '2025-09-10 02:48:48.944124', 'step': 11315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:48.997215', 'step': 11315, 'epoch': 2} {'type': 'loss', 'content': 0.1666915863752365, 'timestamp': '2025-09-10 02:48:49.003221', 'step': 11316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:49.063742', 'step': 11316, 'epoch': 2} {'type': 'loss', 'content': 0.1317644864320755, 'timestamp': '2025-09-10 02:48:49.066015', 'step': 11317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:49.119821', 'step': 11317, 'epoch': 2} {'type': 'loss', 'content': 0.11180984228849411, 'timestamp': '2025-09-10 02:48:49.121957', 'step': 11318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:49.175258', 'step': 11318, 'epoch': 2} {'type': 'loss', 'content': 0.09916216135025024, 'timestamp': '2025-09-10 02:48:49.177559', 'step': 11319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:49.231575', 'step': 11319, 'epoch': 2} {'type': 'loss', 'content': 0.07759933173656464, 'timestamp': '2025-09-10 02:48:49.237418', 'step': 11320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:49.290848', 'step': 11320, 'epoch': 2} {'type': 'loss', 'content': 0.18104997277259827, 'timestamp': '2025-09-10 02:48:49.297394', 'step': 11321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:49.362647', 'step': 11321, 'epoch': 2} {'type': 'loss', 'content': 0.11805478483438492, 'timestamp': '2025-09-10 02:48:49.365047', 'step': 11322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:49.419800', 'step': 11322, 'epoch': 2} {'type': 'loss', 'content': 0.13058757781982422, 'timestamp': '2025-09-10 02:48:49.422121', 'step': 11323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:49.476097', 'step': 11323, 'epoch': 2} {'type': 'loss', 'content': 0.0577523447573185, 'timestamp': '2025-09-10 02:48:49.482468', 'step': 11324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:49.537305', 'step': 11324, 'epoch': 2} {'type': 'loss', 'content': 0.20356576144695282, 'timestamp': '2025-09-10 02:48:49.539652', 'step': 11325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:49.592192', 'step': 11325, 'epoch': 2} {'type': 'loss', 'content': 0.21238775551319122, 'timestamp': '2025-09-10 02:48:49.595681', 'step': 11326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:49.650183', 'step': 11326, 'epoch': 2} {'type': 'loss', 'content': 0.08284395188093185, 'timestamp': '2025-09-10 02:48:49.652334', 'step': 11327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:49.705737', 'step': 11327, 'epoch': 2} {'type': 'loss', 'content': 0.05172521248459816, 'timestamp': '2025-09-10 02:48:49.711709', 'step': 11328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:49.764418', 'step': 11328, 'epoch': 2} {'type': 'loss', 'content': 0.0858101025223732, 'timestamp': '2025-09-10 02:48:49.766691', 'step': 11329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:49.822611', 'step': 11329, 'epoch': 2} {'type': 'loss', 'content': 0.1478293240070343, 'timestamp': '2025-09-10 02:48:49.824693', 'step': 11330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:49.878508', 'step': 11330, 'epoch': 2} {'type': 'loss', 'content': 0.047002051025629044, 'timestamp': '2025-09-10 02:48:49.880677', 'step': 11331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:49.934296', 'step': 11331, 'epoch': 2} {'type': 'loss', 'content': 0.23026904463768005, 'timestamp': '2025-09-10 02:48:49.940689', 'step': 11332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:49.995494', 'step': 11332, 'epoch': 2} {'type': 'loss', 'content': 0.212746262550354, 'timestamp': '2025-09-10 02:48:49.997687', 'step': 11333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:50.049924', 'step': 11333, 'epoch': 2} {'type': 'loss', 'content': 0.10345149040222168, 'timestamp': '2025-09-10 02:48:50.052010', 'step': 11334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:50.104294', 'step': 11334, 'epoch': 2} {'type': 'loss', 'content': 0.062456872314214706, 'timestamp': '2025-09-10 02:48:50.106635', 'step': 11335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:50.160168', 'step': 11335, 'epoch': 2} {'type': 'loss', 'content': 0.0755881816148758, 'timestamp': '2025-09-10 02:48:50.166267', 'step': 11336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:50.219310', 'step': 11336, 'epoch': 2} {'type': 'loss', 'content': 0.07584622502326965, 'timestamp': '2025-09-10 02:48:50.221764', 'step': 11337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:50.275143', 'step': 11337, 'epoch': 2} {'type': 'loss', 'content': 0.06568232923746109, 'timestamp': '2025-09-10 02:48:50.277628', 'step': 11338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:50.330131', 'step': 11338, 'epoch': 2} {'type': 'loss', 'content': 0.18120694160461426, 'timestamp': '2025-09-10 02:48:50.332433', 'step': 11339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:50.385498', 'step': 11339, 'epoch': 2} {'type': 'loss', 'content': 0.06386896967887878, 'timestamp': '2025-09-10 02:48:50.391646', 'step': 11340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:50.445399', 'step': 11340, 'epoch': 2} {'type': 'loss', 'content': 0.14185810089111328, 'timestamp': '2025-09-10 02:48:50.447711', 'step': 11341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:50.500504', 'step': 11341, 'epoch': 2} {'type': 'loss', 'content': 0.1297892928123474, 'timestamp': '2025-09-10 02:48:50.502797', 'step': 11342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:50.556309', 'step': 11342, 'epoch': 2} {'type': 'loss', 'content': 0.08793806284666061, 'timestamp': '2025-09-10 02:48:50.558472', 'step': 11343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:50.611632', 'step': 11343, 'epoch': 2} {'type': 'loss', 'content': 0.14156536757946014, 'timestamp': '2025-09-10 02:48:50.617787', 'step': 11344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:50.670085', 'step': 11344, 'epoch': 2} {'type': 'loss', 'content': 0.14938488602638245, 'timestamp': '2025-09-10 02:48:50.672753', 'step': 11345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:50.726034', 'step': 11345, 'epoch': 2} {'type': 'loss', 'content': 0.0911618024110794, 'timestamp': '2025-09-10 02:48:50.728302', 'step': 11346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:50.781079', 'step': 11346, 'epoch': 2} {'type': 'loss', 'content': 0.09056087583303452, 'timestamp': '2025-09-10 02:48:50.783258', 'step': 11347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:50.835664', 'step': 11347, 'epoch': 2} {'type': 'loss', 'content': 0.13455329835414886, 'timestamp': '2025-09-10 02:48:50.841551', 'step': 11348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:50.894236', 'step': 11348, 'epoch': 2} {'type': 'loss', 'content': 0.18213951587677002, 'timestamp': '2025-09-10 02:48:50.896358', 'step': 11349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:50.950124', 'step': 11349, 'epoch': 2} {'type': 'loss', 'content': 0.1433878391981125, 'timestamp': '2025-09-10 02:48:50.952488', 'step': 11350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:51.006425', 'step': 11350, 'epoch': 2} {'type': 'loss', 'content': 0.1263074278831482, 'timestamp': '2025-09-10 02:48:51.009172', 'step': 11351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:51.062860', 'step': 11351, 'epoch': 2} {'type': 'loss', 'content': 0.16833223402500153, 'timestamp': '2025-09-10 02:48:51.069028', 'step': 11352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:51.122063', 'step': 11352, 'epoch': 2} {'type': 'loss', 'content': 0.06637059897184372, 'timestamp': '2025-09-10 02:48:51.124372', 'step': 11353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:51.177743', 'step': 11353, 'epoch': 2} {'type': 'loss', 'content': 0.12832508981227875, 'timestamp': '2025-09-10 02:48:51.180058', 'step': 11354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:51.232759', 'step': 11354, 'epoch': 2} {'type': 'loss', 'content': 0.13179931044578552, 'timestamp': '2025-09-10 02:48:51.234861', 'step': 11355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:51.290196', 'step': 11355, 'epoch': 2} {'type': 'loss', 'content': 0.08391174674034119, 'timestamp': '2025-09-10 02:48:51.296041', 'step': 11356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:51.348871', 'step': 11356, 'epoch': 2} {'type': 'loss', 'content': 0.11465919017791748, 'timestamp': '2025-09-10 02:48:51.350951', 'step': 11357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:51.403977', 'step': 11357, 'epoch': 2} {'type': 'loss', 'content': 0.1074201837182045, 'timestamp': '2025-09-10 02:48:51.406793', 'step': 11358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:51.461081', 'step': 11358, 'epoch': 2} {'type': 'loss', 'content': 0.11310401558876038, 'timestamp': '2025-09-10 02:48:51.463874', 'step': 11359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:51.518574', 'step': 11359, 'epoch': 2} {'type': 'loss', 'content': 0.1373201459646225, 'timestamp': '2025-09-10 02:48:51.525267', 'step': 11360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:51.577652', 'step': 11360, 'epoch': 2} {'type': 'loss', 'content': 0.15949173271656036, 'timestamp': '2025-09-10 02:48:51.579893', 'step': 11361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:51.632965', 'step': 11361, 'epoch': 2} {'type': 'loss', 'content': 0.08319518715143204, 'timestamp': '2025-09-10 02:48:51.636378', 'step': 11362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:51.690488', 'step': 11362, 'epoch': 2} {'type': 'loss', 'content': 0.10491576045751572, 'timestamp': '2025-09-10 02:48:51.692591', 'step': 11363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:51.746465', 'step': 11363, 'epoch': 2} {'type': 'loss', 'content': 0.07896098494529724, 'timestamp': '2025-09-10 02:48:51.752199', 'step': 11364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:51.804942', 'step': 11364, 'epoch': 2} {'type': 'loss', 'content': 0.11331789940595627, 'timestamp': '2025-09-10 02:48:51.807344', 'step': 11365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:51.862751', 'step': 11365, 'epoch': 2} {'type': 'loss', 'content': 0.11475817859172821, 'timestamp': '2025-09-10 02:48:51.865190', 'step': 11366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:51.919834', 'step': 11366, 'epoch': 2} {'type': 'loss', 'content': 0.14527666568756104, 'timestamp': '2025-09-10 02:48:51.922082', 'step': 11367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:51.975733', 'step': 11367, 'epoch': 2} {'type': 'loss', 'content': 0.052265871316194534, 'timestamp': '2025-09-10 02:48:51.981645', 'step': 11368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:52.035879', 'step': 11368, 'epoch': 2} {'type': 'loss', 'content': 0.1702176332473755, 'timestamp': '2025-09-10 02:48:52.037982', 'step': 11369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:52.091457', 'step': 11369, 'epoch': 2} {'type': 'loss', 'content': 0.15281985700130463, 'timestamp': '2025-09-10 02:48:52.093848', 'step': 11370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:52.147781', 'step': 11370, 'epoch': 2} {'type': 'loss', 'content': 0.11055092513561249, 'timestamp': '2025-09-10 02:48:52.149926', 'step': 11371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:52.203369', 'step': 11371, 'epoch': 2} {'type': 'loss', 'content': 0.10923434048891068, 'timestamp': '2025-09-10 02:48:52.209568', 'step': 11372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:52.262525', 'step': 11372, 'epoch': 2} {'type': 'loss', 'content': 0.09037663042545319, 'timestamp': '2025-09-10 02:48:52.264882', 'step': 11373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:52.317752', 'step': 11373, 'epoch': 2} {'type': 'loss', 'content': 0.09233243018388748, 'timestamp': '2025-09-10 02:48:52.320052', 'step': 11374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:52.374060', 'step': 11374, 'epoch': 2} {'type': 'loss', 'content': 0.15657293796539307, 'timestamp': '2025-09-10 02:48:52.376111', 'step': 11375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:52.429297', 'step': 11375, 'epoch': 2} {'type': 'loss', 'content': 0.06769230216741562, 'timestamp': '2025-09-10 02:48:52.435282', 'step': 11376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:52.487351', 'step': 11376, 'epoch': 2} {'type': 'loss', 'content': 0.15120892226696014, 'timestamp': '2025-09-10 02:48:52.489563', 'step': 11377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:52.542337', 'step': 11377, 'epoch': 2} {'type': 'loss', 'content': 0.19212420284748077, 'timestamp': '2025-09-10 02:48:52.544560', 'step': 11378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:52.598091', 'step': 11378, 'epoch': 2} {'type': 'loss', 'content': 0.21377316117286682, 'timestamp': '2025-09-10 02:48:52.600375', 'step': 11379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:52.653298', 'step': 11379, 'epoch': 2} {'type': 'loss', 'content': 0.1508321464061737, 'timestamp': '2025-09-10 02:48:52.659665', 'step': 11380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:52.713705', 'step': 11380, 'epoch': 2} {'type': 'loss', 'content': 0.10090122371912003, 'timestamp': '2025-09-10 02:48:52.715930', 'step': 11381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:52.768651', 'step': 11381, 'epoch': 2} {'type': 'loss', 'content': 0.1472516506910324, 'timestamp': '2025-09-10 02:48:52.770953', 'step': 11382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:52.823784', 'step': 11382, 'epoch': 2} {'type': 'loss', 'content': 0.1213565319776535, 'timestamp': '2025-09-10 02:48:52.825979', 'step': 11383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:52.878653', 'step': 11383, 'epoch': 2} {'type': 'loss', 'content': 0.10228416323661804, 'timestamp': '2025-09-10 02:48:52.884975', 'step': 11384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:52.970528', 'step': 11384, 'epoch': 2} {'type': 'loss', 'content': 0.07567428797483444, 'timestamp': '2025-09-10 02:48:52.972867', 'step': 11385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:53.048414', 'step': 11385, 'epoch': 2} {'type': 'loss', 'content': 0.15293192863464355, 'timestamp': '2025-09-10 02:48:53.050705', 'step': 11386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:53.110851', 'step': 11386, 'epoch': 2} {'type': 'loss', 'content': 0.08870739489793777, 'timestamp': '2025-09-10 02:48:53.113131', 'step': 11387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:53.177032', 'step': 11387, 'epoch': 2} {'type': 'loss', 'content': 0.09780292212963104, 'timestamp': '2025-09-10 02:48:53.183538', 'step': 11388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:53.236876', 'step': 11388, 'epoch': 2} {'type': 'loss', 'content': 0.08111945539712906, 'timestamp': '2025-09-10 02:48:53.239051', 'step': 11389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:53.293254', 'step': 11389, 'epoch': 2} {'type': 'loss', 'content': 0.08518499881029129, 'timestamp': '2025-09-10 02:48:53.295346', 'step': 11390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:53.351783', 'step': 11390, 'epoch': 2} {'type': 'loss', 'content': 0.16053235530853271, 'timestamp': '2025-09-10 02:48:53.353915', 'step': 11391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:53.407810', 'step': 11391, 'epoch': 2} {'type': 'loss', 'content': 0.12568947672843933, 'timestamp': '2025-09-10 02:48:53.413962', 'step': 11392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:53.471959', 'step': 11392, 'epoch': 2} {'type': 'loss', 'content': 0.14347054064273834, 'timestamp': '2025-09-10 02:48:53.474080', 'step': 11393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:53.528424', 'step': 11393, 'epoch': 2} {'type': 'loss', 'content': 0.11672615259885788, 'timestamp': '2025-09-10 02:48:53.530504', 'step': 11394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:53.585990', 'step': 11394, 'epoch': 2} {'type': 'loss', 'content': 0.16813212633132935, 'timestamp': '2025-09-10 02:48:53.588092', 'step': 11395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:53.644876', 'step': 11395, 'epoch': 2} {'type': 'loss', 'content': 0.05174316465854645, 'timestamp': '2025-09-10 02:48:53.651578', 'step': 11396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:53.706469', 'step': 11396, 'epoch': 2} {'type': 'loss', 'content': 0.15158163011074066, 'timestamp': '2025-09-10 02:48:53.708731', 'step': 11397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:53.763061', 'step': 11397, 'epoch': 2} {'type': 'loss', 'content': 0.11807554215192795, 'timestamp': '2025-09-10 02:48:53.765209', 'step': 11398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:53.819710', 'step': 11398, 'epoch': 2} {'type': 'loss', 'content': 0.14021602272987366, 'timestamp': '2025-09-10 02:48:53.821819', 'step': 11399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:53.876402', 'step': 11399, 'epoch': 2} {'type': 'loss', 'content': 0.1081850528717041, 'timestamp': '2025-09-10 02:48:53.882438', 'step': 11400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:53.934942', 'step': 11400, 'epoch': 2} {'type': 'loss', 'content': 0.14253196120262146, 'timestamp': '2025-09-10 02:48:53.937061', 'step': 11401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:53.990531', 'step': 11401, 'epoch': 2} {'type': 'loss', 'content': 0.18712270259857178, 'timestamp': '2025-09-10 02:48:53.992653', 'step': 11402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:54.046660', 'step': 11402, 'epoch': 2} {'type': 'loss', 'content': 0.16362404823303223, 'timestamp': '2025-09-10 02:48:54.048865', 'step': 11403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:54.103392', 'step': 11403, 'epoch': 2} {'type': 'loss', 'content': 0.13512395322322845, 'timestamp': '2025-09-10 02:48:54.109522', 'step': 11404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:54.163009', 'step': 11404, 'epoch': 2} {'type': 'loss', 'content': 0.1893664002418518, 'timestamp': '2025-09-10 02:48:54.165375', 'step': 11405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:54.218144', 'step': 11405, 'epoch': 2} {'type': 'loss', 'content': 0.18692760169506073, 'timestamp': '2025-09-10 02:48:54.220330', 'step': 11406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:54.276293', 'step': 11406, 'epoch': 2} {'type': 'loss', 'content': 0.1275850385427475, 'timestamp': '2025-09-10 02:48:54.278726', 'step': 11407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:54.336855', 'step': 11407, 'epoch': 2} {'type': 'loss', 'content': 0.14394372701644897, 'timestamp': '2025-09-10 02:48:54.343465', 'step': 11408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:54.398521', 'step': 11408, 'epoch': 2} {'type': 'loss', 'content': 0.10538376867771149, 'timestamp': '2025-09-10 02:48:54.400764', 'step': 11409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:54.454887', 'step': 11409, 'epoch': 2} {'type': 'loss', 'content': 0.10684176534414291, 'timestamp': '2025-09-10 02:48:54.457230', 'step': 11410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:54.512179', 'step': 11410, 'epoch': 2} {'type': 'loss', 'content': 0.2415684312582016, 'timestamp': '2025-09-10 02:48:54.514842', 'step': 11411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:54.568416', 'step': 11411, 'epoch': 2} {'type': 'loss', 'content': 0.10408112406730652, 'timestamp': '2025-09-10 02:48:54.574420', 'step': 11412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:54.627273', 'step': 11412, 'epoch': 2} {'type': 'loss', 'content': 0.13669243454933167, 'timestamp': '2025-09-10 02:48:54.629542', 'step': 11413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:54.683364', 'step': 11413, 'epoch': 2} {'type': 'loss', 'content': 0.07168291509151459, 'timestamp': '2025-09-10 02:48:54.685723', 'step': 11414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:54.740455', 'step': 11414, 'epoch': 2} {'type': 'loss', 'content': 0.11523056775331497, 'timestamp': '2025-09-10 02:48:54.742730', 'step': 11415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:54.809504', 'step': 11415, 'epoch': 2} {'type': 'loss', 'content': 0.07674967497587204, 'timestamp': '2025-09-10 02:48:54.815773', 'step': 11416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:54.869310', 'step': 11416, 'epoch': 2} {'type': 'loss', 'content': 0.11773009598255157, 'timestamp': '2025-09-10 02:48:54.871645', 'step': 11417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:54.925410', 'step': 11417, 'epoch': 2} {'type': 'loss', 'content': 0.14686988294124603, 'timestamp': '2025-09-10 02:48:54.929305', 'step': 11418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:54.984055', 'step': 11418, 'epoch': 2} {'type': 'loss', 'content': 0.11444978415966034, 'timestamp': '2025-09-10 02:48:54.985920', 'step': 11419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:55.039580', 'step': 11419, 'epoch': 2} {'type': 'loss', 'content': 0.052424050867557526, 'timestamp': '2025-09-10 02:48:55.045307', 'step': 11420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:55.098432', 'step': 11420, 'epoch': 2} {'type': 'loss', 'content': 0.17173714935779572, 'timestamp': '2025-09-10 02:48:55.100398', 'step': 11421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:55.154553', 'step': 11421, 'epoch': 2} {'type': 'loss', 'content': 0.1051860824227333, 'timestamp': '2025-09-10 02:48:55.156811', 'step': 11422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:55.210735', 'step': 11422, 'epoch': 2} {'type': 'loss', 'content': 0.15143339335918427, 'timestamp': '2025-09-10 02:48:55.212852', 'step': 11423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:55.267609', 'step': 11423, 'epoch': 2} {'type': 'loss', 'content': 0.10646927356719971, 'timestamp': '2025-09-10 02:48:55.273705', 'step': 11424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:55.327863', 'step': 11424, 'epoch': 2} {'type': 'loss', 'content': 0.09814754128456116, 'timestamp': '2025-09-10 02:48:55.330305', 'step': 11425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:55.384987', 'step': 11425, 'epoch': 2} {'type': 'loss', 'content': 0.2151867151260376, 'timestamp': '2025-09-10 02:48:55.387110', 'step': 11426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:55.444283', 'step': 11426, 'epoch': 2} {'type': 'loss', 'content': 0.08089426904916763, 'timestamp': '2025-09-10 02:48:55.446341', 'step': 11427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:55.505367', 'step': 11427, 'epoch': 2} {'type': 'loss', 'content': 0.1212322786450386, 'timestamp': '2025-09-10 02:48:55.512006', 'step': 11428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:55.567427', 'step': 11428, 'epoch': 2} {'type': 'loss', 'content': 0.21094544231891632, 'timestamp': '2025-09-10 02:48:55.569508', 'step': 11429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:55.630932', 'step': 11429, 'epoch': 2} {'type': 'loss', 'content': 0.17177706956863403, 'timestamp': '2025-09-10 02:48:55.632900', 'step': 11430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:55.689935', 'step': 11430, 'epoch': 2} {'type': 'loss', 'content': 0.08060950040817261, 'timestamp': '2025-09-10 02:48:55.691820', 'step': 11431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:55.747893', 'step': 11431, 'epoch': 2} {'type': 'loss', 'content': 0.12152750790119171, 'timestamp': '2025-09-10 02:48:55.754402', 'step': 11432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:55.810340', 'step': 11432, 'epoch': 2} {'type': 'loss', 'content': 0.16020455956459045, 'timestamp': '2025-09-10 02:48:55.812536', 'step': 11433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:55.869877', 'step': 11433, 'epoch': 2} {'type': 'loss', 'content': 0.12352164834737778, 'timestamp': '2025-09-10 02:48:55.872098', 'step': 11434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:55.929587', 'step': 11434, 'epoch': 2} {'type': 'loss', 'content': 0.12945345044136047, 'timestamp': '2025-09-10 02:48:55.931752', 'step': 11435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:55.986515', 'step': 11435, 'epoch': 2} {'type': 'loss', 'content': 0.08598518371582031, 'timestamp': '2025-09-10 02:48:55.992419', 'step': 11436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:56.046083', 'step': 11436, 'epoch': 2} {'type': 'loss', 'content': 0.13317379355430603, 'timestamp': '2025-09-10 02:48:56.047855', 'step': 11437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:56.101501', 'step': 11437, 'epoch': 2} {'type': 'loss', 'content': 0.11903877556324005, 'timestamp': '2025-09-10 02:48:56.103682', 'step': 11438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:56.158639', 'step': 11438, 'epoch': 2} {'type': 'loss', 'content': 0.14469090104103088, 'timestamp': '2025-09-10 02:48:56.160868', 'step': 11439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:56.217796', 'step': 11439, 'epoch': 2} {'type': 'loss', 'content': 0.1292228102684021, 'timestamp': '2025-09-10 02:48:56.225103', 'step': 11440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:56.285048', 'step': 11440, 'epoch': 2} {'type': 'loss', 'content': 0.07274749875068665, 'timestamp': '2025-09-10 02:48:56.287650', 'step': 11441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:56.348127', 'step': 11441, 'epoch': 2} {'type': 'loss', 'content': 0.12510357797145844, 'timestamp': '2025-09-10 02:48:56.350219', 'step': 11442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:56.405712', 'step': 11442, 'epoch': 2} {'type': 'loss', 'content': 0.10989321768283844, 'timestamp': '2025-09-10 02:48:56.408025', 'step': 11443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:56.463940', 'step': 11443, 'epoch': 2} {'type': 'loss', 'content': 0.07081563025712967, 'timestamp': '2025-09-10 02:48:56.471229', 'step': 11444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:56.524454', 'step': 11444, 'epoch': 2} {'type': 'loss', 'content': 0.1116207018494606, 'timestamp': '2025-09-10 02:48:56.526819', 'step': 11445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:56.580204', 'step': 11445, 'epoch': 2} {'type': 'loss', 'content': 0.06749240309000015, 'timestamp': '2025-09-10 02:48:56.582276', 'step': 11446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:56.636408', 'step': 11446, 'epoch': 2} {'type': 'loss', 'content': 0.12602347135543823, 'timestamp': '2025-09-10 02:48:56.638449', 'step': 11447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:56.692612', 'step': 11447, 'epoch': 2} {'type': 'loss', 'content': 0.06372874230146408, 'timestamp': '2025-09-10 02:48:56.698682', 'step': 11448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:56.752399', 'step': 11448, 'epoch': 2} {'type': 'loss', 'content': 0.12567836046218872, 'timestamp': '2025-09-10 02:48:56.754470', 'step': 11449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:56.810218', 'step': 11449, 'epoch': 2} {'type': 'loss', 'content': 0.08921893686056137, 'timestamp': '2025-09-10 02:48:56.812536', 'step': 11450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:56.869230', 'step': 11450, 'epoch': 2} {'type': 'loss', 'content': 0.1319025605916977, 'timestamp': '2025-09-10 02:48:56.871346', 'step': 11451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:56.924842', 'step': 11451, 'epoch': 2} {'type': 'loss', 'content': 0.07848525792360306, 'timestamp': '2025-09-10 02:48:56.930762', 'step': 11452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:56.983724', 'step': 11452, 'epoch': 2} {'type': 'loss', 'content': 0.0840197280049324, 'timestamp': '2025-09-10 02:48:56.986304', 'step': 11453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:57.039229', 'step': 11453, 'epoch': 2} {'type': 'loss', 'content': 0.09009021520614624, 'timestamp': '2025-09-10 02:48:57.041828', 'step': 11454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:57.095407', 'step': 11454, 'epoch': 2} {'type': 'loss', 'content': 0.09591256827116013, 'timestamp': '2025-09-10 02:48:57.097789', 'step': 11455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:57.151096', 'step': 11455, 'epoch': 2} {'type': 'loss', 'content': 0.14257793128490448, 'timestamp': '2025-09-10 02:48:57.157239', 'step': 11456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:57.209966', 'step': 11456, 'epoch': 2} {'type': 'loss', 'content': 0.08631440997123718, 'timestamp': '2025-09-10 02:48:57.212233', 'step': 11457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:57.264984', 'step': 11457, 'epoch': 2} {'type': 'loss', 'content': 0.21528539061546326, 'timestamp': '2025-09-10 02:48:57.267441', 'step': 11458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:57.320815', 'step': 11458, 'epoch': 2} {'type': 'loss', 'content': 0.11626099795103073, 'timestamp': '2025-09-10 02:48:57.323244', 'step': 11459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:57.378464', 'step': 11459, 'epoch': 2} {'type': 'loss', 'content': 0.16033174097537994, 'timestamp': '2025-09-10 02:48:57.384600', 'step': 11460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:57.436783', 'step': 11460, 'epoch': 2} {'type': 'loss', 'content': 0.11943609267473221, 'timestamp': '2025-09-10 02:48:57.439051', 'step': 11461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:57.492393', 'step': 11461, 'epoch': 2} {'type': 'loss', 'content': 0.10376331955194473, 'timestamp': '2025-09-10 02:48:57.494570', 'step': 11462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:57.547991', 'step': 11462, 'epoch': 2} {'type': 'loss', 'content': 0.11244535446166992, 'timestamp': '2025-09-10 02:48:57.550500', 'step': 11463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:57.604448', 'step': 11463, 'epoch': 2} {'type': 'loss', 'content': 0.12066853046417236, 'timestamp': '2025-09-10 02:48:57.610485', 'step': 11464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:57.663209', 'step': 11464, 'epoch': 2} {'type': 'loss', 'content': 0.1911994218826294, 'timestamp': '2025-09-10 02:48:57.665558', 'step': 11465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:57.719435', 'step': 11465, 'epoch': 2} {'type': 'loss', 'content': 0.15622983872890472, 'timestamp': '2025-09-10 02:48:57.721720', 'step': 11466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:57.775003', 'step': 11466, 'epoch': 2} {'type': 'loss', 'content': 0.17030879855155945, 'timestamp': '2025-09-10 02:48:57.777512', 'step': 11467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:57.830323', 'step': 11467, 'epoch': 2} {'type': 'loss', 'content': 0.14064665138721466, 'timestamp': '2025-09-10 02:48:57.836447', 'step': 11468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:57.889275', 'step': 11468, 'epoch': 2} {'type': 'loss', 'content': 0.172671839594841, 'timestamp': '2025-09-10 02:48:57.891608', 'step': 11469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:57.944573', 'step': 11469, 'epoch': 2} {'type': 'loss', 'content': 0.08729103952646255, 'timestamp': '2025-09-10 02:48:57.946870', 'step': 11470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:57.999834', 'step': 11470, 'epoch': 2} {'type': 'loss', 'content': 0.12092434614896774, 'timestamp': '2025-09-10 02:48:58.002070', 'step': 11471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:58.054909', 'step': 11471, 'epoch': 2} {'type': 'loss', 'content': 0.09361875057220459, 'timestamp': '2025-09-10 02:48:58.060909', 'step': 11472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:58.112904', 'step': 11472, 'epoch': 2} {'type': 'loss', 'content': 0.23988744616508484, 'timestamp': '2025-09-10 02:48:58.115016', 'step': 11473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:58.167416', 'step': 11473, 'epoch': 2} {'type': 'loss', 'content': 0.10020847618579865, 'timestamp': '2025-09-10 02:48:58.170830', 'step': 11474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:58.223770', 'step': 11474, 'epoch': 2} {'type': 'loss', 'content': 0.12259560078382492, 'timestamp': '2025-09-10 02:48:58.226051', 'step': 11475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:58.281288', 'step': 11475, 'epoch': 2} {'type': 'loss', 'content': 0.16363005340099335, 'timestamp': '2025-09-10 02:48:58.287181', 'step': 11476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:58.340921', 'step': 11476, 'epoch': 2} {'type': 'loss', 'content': 0.0637926459312439, 'timestamp': '2025-09-10 02:48:58.343206', 'step': 11477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:58.396068', 'step': 11477, 'epoch': 2} {'type': 'loss', 'content': 0.16095267236232758, 'timestamp': '2025-09-10 02:48:58.400422', 'step': 11478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:58.455107', 'step': 11478, 'epoch': 2} {'type': 'loss', 'content': 0.1411127746105194, 'timestamp': '2025-09-10 02:48:58.458018', 'step': 11479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:58.513058', 'step': 11479, 'epoch': 2} {'type': 'loss', 'content': 0.07913686335086823, 'timestamp': '2025-09-10 02:48:58.520015', 'step': 11480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:58.572408', 'step': 11480, 'epoch': 2} {'type': 'loss', 'content': 0.12115279585123062, 'timestamp': '2025-09-10 02:48:58.574672', 'step': 11481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:58.627310', 'step': 11481, 'epoch': 2} {'type': 'loss', 'content': 0.2595278322696686, 'timestamp': '2025-09-10 02:48:58.631600', 'step': 11482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:58.692391', 'step': 11482, 'epoch': 2} {'type': 'loss', 'content': 0.11510960012674332, 'timestamp': '2025-09-10 02:48:58.696845', 'step': 11483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:58.753289', 'step': 11483, 'epoch': 2} {'type': 'loss', 'content': 0.1470576524734497, 'timestamp': '2025-09-10 02:48:58.759269', 'step': 11484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:58.814990', 'step': 11484, 'epoch': 2} {'type': 'loss', 'content': 0.07241671532392502, 'timestamp': '2025-09-10 02:48:58.817278', 'step': 11485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:58.875722', 'step': 11485, 'epoch': 2} {'type': 'loss', 'content': 0.1286732703447342, 'timestamp': '2025-09-10 02:48:58.878114', 'step': 11486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:58.930988', 'step': 11486, 'epoch': 2} {'type': 'loss', 'content': 0.07401841878890991, 'timestamp': '2025-09-10 02:48:58.933341', 'step': 11487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:58.991036', 'step': 11487, 'epoch': 2} {'type': 'loss', 'content': 0.11496216803789139, 'timestamp': '2025-09-10 02:48:58.997307', 'step': 11488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:59.050165', 'step': 11488, 'epoch': 2} {'type': 'loss', 'content': 0.16080456972122192, 'timestamp': '2025-09-10 02:48:59.052543', 'step': 11489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:59.105459', 'step': 11489, 'epoch': 2} {'type': 'loss', 'content': 0.11226733773946762, 'timestamp': '2025-09-10 02:48:59.107853', 'step': 11490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:59.161223', 'step': 11490, 'epoch': 2} {'type': 'loss', 'content': 0.10539742559194565, 'timestamp': '2025-09-10 02:48:59.163617', 'step': 11491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:59.217282', 'step': 11491, 'epoch': 2} {'type': 'loss', 'content': 0.1360034942626953, 'timestamp': '2025-09-10 02:48:59.223281', 'step': 11492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:59.275837', 'step': 11492, 'epoch': 2} {'type': 'loss', 'content': 0.15464730560779572, 'timestamp': '2025-09-10 02:48:59.278245', 'step': 11493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:59.330556', 'step': 11493, 'epoch': 2} {'type': 'loss', 'content': 0.12344672530889511, 'timestamp': '2025-09-10 02:48:59.332715', 'step': 11494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:59.385997', 'step': 11494, 'epoch': 2} {'type': 'loss', 'content': 0.13599267601966858, 'timestamp': '2025-09-10 02:48:59.388336', 'step': 11495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:48:59.441525', 'step': 11495, 'epoch': 2} {'type': 'loss', 'content': 0.15258528292179108, 'timestamp': '2025-09-10 02:48:59.447559', 'step': 11496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:59.503492', 'step': 11496, 'epoch': 2} {'type': 'loss', 'content': 0.14972513914108276, 'timestamp': '2025-09-10 02:48:59.505740', 'step': 11497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:59.561461', 'step': 11497, 'epoch': 2} {'type': 'loss', 'content': 0.1000337228178978, 'timestamp': '2025-09-10 02:48:59.563598', 'step': 11498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:48:59.616644', 'step': 11498, 'epoch': 2} {'type': 'loss', 'content': 0.1270526796579361, 'timestamp': '2025-09-10 02:48:59.619069', 'step': 11499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:48:59.672453', 'step': 11499, 'epoch': 2} {'type': 'loss', 'content': 0.06420907378196716, 'timestamp': '2025-09-10 02:48:59.678656', 'step': 11500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11500', 'timestamp': '2025-09-10 02:49:00.111060', 'step': 11500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:00.170358', 'step': 11500, 'epoch': 2} {'type': 'loss', 'content': 0.14080044627189636, 'timestamp': '2025-09-10 02:49:00.172760', 'step': 11501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:00.228180', 'step': 11501, 'epoch': 2} {'type': 'loss', 'content': 0.09039904177188873, 'timestamp': '2025-09-10 02:49:00.230455', 'step': 11502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:00.288160', 'step': 11502, 'epoch': 2} {'type': 'loss', 'content': 0.17613908648490906, 'timestamp': '2025-09-10 02:49:00.290487', 'step': 11503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:00.344007', 'step': 11503, 'epoch': 2} {'type': 'loss', 'content': 0.11131778359413147, 'timestamp': '2025-09-10 02:49:00.350736', 'step': 11504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:00.404881', 'step': 11504, 'epoch': 2} {'type': 'loss', 'content': 0.09449237585067749, 'timestamp': '2025-09-10 02:49:00.407458', 'step': 11505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:00.463442', 'step': 11505, 'epoch': 2} {'type': 'loss', 'content': 0.06122586131095886, 'timestamp': '2025-09-10 02:49:00.467133', 'step': 11506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:00.522161', 'step': 11506, 'epoch': 2} {'type': 'loss', 'content': 0.08631367236375809, 'timestamp': '2025-09-10 02:49:00.524140', 'step': 11507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:00.578087', 'step': 11507, 'epoch': 2} {'type': 'loss', 'content': 0.1795840859413147, 'timestamp': '2025-09-10 02:49:00.583979', 'step': 11508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:00.637012', 'step': 11508, 'epoch': 2} {'type': 'loss', 'content': 0.08095069974660873, 'timestamp': '2025-09-10 02:49:00.639396', 'step': 11509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:00.693938', 'step': 11509, 'epoch': 2} {'type': 'loss', 'content': 0.1729993373155594, 'timestamp': '2025-09-10 02:49:00.696556', 'step': 11510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:00.751618', 'step': 11510, 'epoch': 2} {'type': 'loss', 'content': 0.07919837534427643, 'timestamp': '2025-09-10 02:49:00.754081', 'step': 11511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:00.808186', 'step': 11511, 'epoch': 2} {'type': 'loss', 'content': 0.1405445635318756, 'timestamp': '2025-09-10 02:49:00.814860', 'step': 11512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:00.868028', 'step': 11512, 'epoch': 2} {'type': 'loss', 'content': 0.0701705738902092, 'timestamp': '2025-09-10 02:49:00.870308', 'step': 11513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:00.923717', 'step': 11513, 'epoch': 2} {'type': 'loss', 'content': 0.15036962926387787, 'timestamp': '2025-09-10 02:49:00.925979', 'step': 11514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:00.979911', 'step': 11514, 'epoch': 2} {'type': 'loss', 'content': 0.11661320179700851, 'timestamp': '2025-09-10 02:49:00.982246', 'step': 11515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.036063', 'step': 11515, 'epoch': 2} {'type': 'loss', 'content': 0.094386987388134, 'timestamp': '2025-09-10 02:49:01.042383', 'step': 11516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.097906', 'step': 11516, 'epoch': 2} {'type': 'loss', 'content': 0.17268212139606476, 'timestamp': '2025-09-10 02:49:01.100235', 'step': 11517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.153895', 'step': 11517, 'epoch': 2} {'type': 'loss', 'content': 0.16264763474464417, 'timestamp': '2025-09-10 02:49:01.156176', 'step': 11518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:01.210208', 'step': 11518, 'epoch': 2} {'type': 'loss', 'content': 0.1420837789773941, 'timestamp': '2025-09-10 02:49:01.212665', 'step': 11519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.265595', 'step': 11519, 'epoch': 2} {'type': 'loss', 'content': 0.16046929359436035, 'timestamp': '2025-09-10 02:49:01.271924', 'step': 11520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:01.325953', 'step': 11520, 'epoch': 2} {'type': 'loss', 'content': 0.20039497315883636, 'timestamp': '2025-09-10 02:49:01.328233', 'step': 11521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:01.381211', 'step': 11521, 'epoch': 2} {'type': 'loss', 'content': 0.14976690709590912, 'timestamp': '2025-09-10 02:49:01.383546', 'step': 11522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.436812', 'step': 11522, 'epoch': 2} {'type': 'loss', 'content': 0.16360241174697876, 'timestamp': '2025-09-10 02:49:01.440130', 'step': 11523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:01.494396', 'step': 11523, 'epoch': 2} {'type': 'loss', 'content': 0.1965440809726715, 'timestamp': '2025-09-10 02:49:01.500698', 'step': 11524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.553956', 'step': 11524, 'epoch': 2} {'type': 'loss', 'content': 0.08907153457403183, 'timestamp': '2025-09-10 02:49:01.556405', 'step': 11525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.610147', 'step': 11525, 'epoch': 2} {'type': 'loss', 'content': 0.13786378502845764, 'timestamp': '2025-09-10 02:49:01.612546', 'step': 11526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:01.665703', 'step': 11526, 'epoch': 2} {'type': 'loss', 'content': 0.1255478858947754, 'timestamp': '2025-09-10 02:49:01.667716', 'step': 11527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.722429', 'step': 11527, 'epoch': 2} {'type': 'loss', 'content': 0.11639166623353958, 'timestamp': '2025-09-10 02:49:01.728565', 'step': 11528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:01.784807', 'step': 11528, 'epoch': 2} {'type': 'loss', 'content': 0.1507086604833603, 'timestamp': '2025-09-10 02:49:01.787129', 'step': 11529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:01.846659', 'step': 11529, 'epoch': 2} {'type': 'loss', 'content': 0.08716292679309845, 'timestamp': '2025-09-10 02:49:01.849060', 'step': 11530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.911157', 'step': 11530, 'epoch': 2} {'type': 'loss', 'content': 0.09004400670528412, 'timestamp': '2025-09-10 02:49:01.913507', 'step': 11531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:01.967856', 'step': 11531, 'epoch': 2} {'type': 'loss', 'content': 0.12120295315980911, 'timestamp': '2025-09-10 02:49:01.974323', 'step': 11532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:02.027795', 'step': 11532, 'epoch': 2} {'type': 'loss', 'content': 0.1545284241437912, 'timestamp': '2025-09-10 02:49:02.030083', 'step': 11533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:02.083731', 'step': 11533, 'epoch': 2} {'type': 'loss', 'content': 0.07340501993894577, 'timestamp': '2025-09-10 02:49:02.086027', 'step': 11534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:02.139837', 'step': 11534, 'epoch': 2} {'type': 'loss', 'content': 0.14373832941055298, 'timestamp': '2025-09-10 02:49:02.142321', 'step': 11535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:02.196469', 'step': 11535, 'epoch': 2} {'type': 'loss', 'content': 0.1337101012468338, 'timestamp': '2025-09-10 02:49:02.202835', 'step': 11536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:02.255891', 'step': 11536, 'epoch': 2} {'type': 'loss', 'content': 0.16051389276981354, 'timestamp': '2025-09-10 02:49:02.258299', 'step': 11537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:02.317673', 'step': 11537, 'epoch': 2} {'type': 'loss', 'content': 0.11371191591024399, 'timestamp': '2025-09-10 02:49:02.320000', 'step': 11538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:02.376796', 'step': 11538, 'epoch': 2} {'type': 'loss', 'content': 0.07321707159280777, 'timestamp': '2025-09-10 02:49:02.379275', 'step': 11539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:02.434641', 'step': 11539, 'epoch': 2} {'type': 'loss', 'content': 0.21185895800590515, 'timestamp': '2025-09-10 02:49:02.441206', 'step': 11540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:02.494980', 'step': 11540, 'epoch': 2} {'type': 'loss', 'content': 0.1102878525853157, 'timestamp': '2025-09-10 02:49:02.497281', 'step': 11541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:02.553265', 'step': 11541, 'epoch': 2} {'type': 'loss', 'content': 0.16190195083618164, 'timestamp': '2025-09-10 02:49:02.555602', 'step': 11542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:02.610781', 'step': 11542, 'epoch': 2} {'type': 'loss', 'content': 0.07193781435489655, 'timestamp': '2025-09-10 02:49:02.613079', 'step': 11543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:02.668672', 'step': 11543, 'epoch': 2} {'type': 'loss', 'content': 0.1343170702457428, 'timestamp': '2025-09-10 02:49:02.675035', 'step': 11544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:02.730124', 'step': 11544, 'epoch': 2} {'type': 'loss', 'content': 0.14249922335147858, 'timestamp': '2025-09-10 02:49:02.732502', 'step': 11545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:02.787139', 'step': 11545, 'epoch': 2} {'type': 'loss', 'content': 0.1006571426987648, 'timestamp': '2025-09-10 02:49:02.789442', 'step': 11546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:02.843721', 'step': 11546, 'epoch': 2} {'type': 'loss', 'content': 0.16424554586410522, 'timestamp': '2025-09-10 02:49:02.845983', 'step': 11547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:02.899086', 'step': 11547, 'epoch': 2} {'type': 'loss', 'content': 0.16310931742191315, 'timestamp': '2025-09-10 02:49:02.905236', 'step': 11548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:02.959101', 'step': 11548, 'epoch': 2} {'type': 'loss', 'content': 0.09175500273704529, 'timestamp': '2025-09-10 02:49:02.961540', 'step': 11549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:03.018102', 'step': 11549, 'epoch': 2} {'type': 'loss', 'content': 0.039960168302059174, 'timestamp': '2025-09-10 02:49:03.020502', 'step': 11550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:03.075428', 'step': 11550, 'epoch': 2} {'type': 'loss', 'content': 0.06829974055290222, 'timestamp': '2025-09-10 02:49:03.077687', 'step': 11551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:03.132492', 'step': 11551, 'epoch': 2} {'type': 'loss', 'content': 0.0767558217048645, 'timestamp': '2025-09-10 02:49:03.138977', 'step': 11552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:03.194471', 'step': 11552, 'epoch': 2} {'type': 'loss', 'content': 0.11440996080636978, 'timestamp': '2025-09-10 02:49:03.196993', 'step': 11553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:03.253349', 'step': 11553, 'epoch': 2} {'type': 'loss', 'content': 0.08586087822914124, 'timestamp': '2025-09-10 02:49:03.255736', 'step': 11554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:03.311249', 'step': 11554, 'epoch': 2} {'type': 'loss', 'content': 0.09942089021205902, 'timestamp': '2025-09-10 02:49:03.313805', 'step': 11555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:03.369756', 'step': 11555, 'epoch': 2} {'type': 'loss', 'content': 0.15129229426383972, 'timestamp': '2025-09-10 02:49:03.376321', 'step': 11556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:03.430893', 'step': 11556, 'epoch': 2} {'type': 'loss', 'content': 0.0873798057436943, 'timestamp': '2025-09-10 02:49:03.433122', 'step': 11557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:03.488088', 'step': 11557, 'epoch': 2} {'type': 'loss', 'content': 0.1335064321756363, 'timestamp': '2025-09-10 02:49:03.490435', 'step': 11558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:03.544867', 'step': 11558, 'epoch': 2} {'type': 'loss', 'content': 0.20689313113689423, 'timestamp': '2025-09-10 02:49:03.547221', 'step': 11559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:03.602943', 'step': 11559, 'epoch': 2} {'type': 'loss', 'content': 0.08913882821798325, 'timestamp': '2025-09-10 02:49:03.609156', 'step': 11560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:03.662287', 'step': 11560, 'epoch': 2} {'type': 'loss', 'content': 0.11620394885540009, 'timestamp': '2025-09-10 02:49:03.664570', 'step': 11561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:03.718111', 'step': 11561, 'epoch': 2} {'type': 'loss', 'content': 0.12460781633853912, 'timestamp': '2025-09-10 02:49:03.720465', 'step': 11562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:03.774242', 'step': 11562, 'epoch': 2} {'type': 'loss', 'content': 0.1447928249835968, 'timestamp': '2025-09-10 02:49:03.776580', 'step': 11563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:03.831119', 'step': 11563, 'epoch': 2} {'type': 'loss', 'content': 0.08299778401851654, 'timestamp': '2025-09-10 02:49:03.837804', 'step': 11564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:03.892978', 'step': 11564, 'epoch': 2} {'type': 'loss', 'content': 0.07354116439819336, 'timestamp': '2025-09-10 02:49:03.895353', 'step': 11565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:03.949248', 'step': 11565, 'epoch': 2} {'type': 'loss', 'content': 0.1358126550912857, 'timestamp': '2025-09-10 02:49:03.951459', 'step': 11566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:04.007123', 'step': 11566, 'epoch': 2} {'type': 'loss', 'content': 0.1757543534040451, 'timestamp': '2025-09-10 02:49:04.009517', 'step': 11567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:04.063732', 'step': 11567, 'epoch': 2} {'type': 'loss', 'content': 0.13205720484256744, 'timestamp': '2025-09-10 02:49:04.070108', 'step': 11568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:04.122596', 'step': 11568, 'epoch': 2} {'type': 'loss', 'content': 0.1413751244544983, 'timestamp': '2025-09-10 02:49:04.124729', 'step': 11569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:04.179996', 'step': 11569, 'epoch': 2} {'type': 'loss', 'content': 0.09926552325487137, 'timestamp': '2025-09-10 02:49:04.182123', 'step': 11570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:04.235266', 'step': 11570, 'epoch': 2} {'type': 'loss', 'content': 0.24397780001163483, 'timestamp': '2025-09-10 02:49:04.237481', 'step': 11571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:04.290518', 'step': 11571, 'epoch': 2} {'type': 'loss', 'content': 0.08396223187446594, 'timestamp': '2025-09-10 02:49:04.296744', 'step': 11572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:04.350055', 'step': 11572, 'epoch': 2} {'type': 'loss', 'content': 0.12180563807487488, 'timestamp': '2025-09-10 02:49:04.352396', 'step': 11573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:04.405464', 'step': 11573, 'epoch': 2} {'type': 'loss', 'content': 0.09993066638708115, 'timestamp': '2025-09-10 02:49:04.407595', 'step': 11574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:04.461336', 'step': 11574, 'epoch': 2} {'type': 'loss', 'content': 0.11601541936397552, 'timestamp': '2025-09-10 02:49:04.463668', 'step': 11575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:04.517129', 'step': 11575, 'epoch': 2} {'type': 'loss', 'content': 0.03336622565984726, 'timestamp': '2025-09-10 02:49:04.523441', 'step': 11576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:04.575949', 'step': 11576, 'epoch': 2} {'type': 'loss', 'content': 0.14466579258441925, 'timestamp': '2025-09-10 02:49:04.578344', 'step': 11577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:04.631541', 'step': 11577, 'epoch': 2} {'type': 'loss', 'content': 0.08282987028360367, 'timestamp': '2025-09-10 02:49:04.633848', 'step': 11578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:04.688487', 'step': 11578, 'epoch': 2} {'type': 'loss', 'content': 0.10645744949579239, 'timestamp': '2025-09-10 02:49:04.690744', 'step': 11579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:04.744987', 'step': 11579, 'epoch': 2} {'type': 'loss', 'content': 0.10374953597784042, 'timestamp': '2025-09-10 02:49:04.751389', 'step': 11580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:04.805237', 'step': 11580, 'epoch': 2} {'type': 'loss', 'content': 0.16155463457107544, 'timestamp': '2025-09-10 02:49:04.807487', 'step': 11581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:04.862421', 'step': 11581, 'epoch': 2} {'type': 'loss', 'content': 0.10799498111009598, 'timestamp': '2025-09-10 02:49:04.864952', 'step': 11582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:04.920252', 'step': 11582, 'epoch': 2} {'type': 'loss', 'content': 0.14350880682468414, 'timestamp': '2025-09-10 02:49:04.922759', 'step': 11583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:04.976438', 'step': 11583, 'epoch': 2} {'type': 'loss', 'content': 0.12569968402385712, 'timestamp': '2025-09-10 02:49:04.982907', 'step': 11584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:05.036001', 'step': 11584, 'epoch': 2} {'type': 'loss', 'content': 0.15957102179527283, 'timestamp': '2025-09-10 02:49:05.038487', 'step': 11585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:05.092993', 'step': 11585, 'epoch': 2} {'type': 'loss', 'content': 0.1562831550836563, 'timestamp': '2025-09-10 02:49:05.095319', 'step': 11586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:05.149334', 'step': 11586, 'epoch': 2} {'type': 'loss', 'content': 0.10377826541662216, 'timestamp': '2025-09-10 02:49:05.151719', 'step': 11587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:05.205376', 'step': 11587, 'epoch': 2} {'type': 'loss', 'content': 0.06952614337205887, 'timestamp': '2025-09-10 02:49:05.211819', 'step': 11588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:05.266878', 'step': 11588, 'epoch': 2} {'type': 'loss', 'content': 0.039864543825387955, 'timestamp': '2025-09-10 02:49:05.269469', 'step': 11589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:05.327937', 'step': 11589, 'epoch': 2} {'type': 'loss', 'content': 0.1459212303161621, 'timestamp': '2025-09-10 02:49:05.330299', 'step': 11590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:05.386670', 'step': 11590, 'epoch': 2} {'type': 'loss', 'content': 0.1482720673084259, 'timestamp': '2025-09-10 02:49:05.390421', 'step': 11591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:05.451140', 'step': 11591, 'epoch': 2} {'type': 'loss', 'content': 0.09558472782373428, 'timestamp': '2025-09-10 02:49:05.462783', 'step': 11592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:05.519036', 'step': 11592, 'epoch': 2} {'type': 'loss', 'content': 0.14385223388671875, 'timestamp': '2025-09-10 02:49:05.525019', 'step': 11593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:05.585698', 'step': 11593, 'epoch': 2} {'type': 'loss', 'content': 0.15781766176223755, 'timestamp': '2025-09-10 02:49:05.588044', 'step': 11594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:05.642496', 'step': 11594, 'epoch': 2} {'type': 'loss', 'content': 0.12315274775028229, 'timestamp': '2025-09-10 02:49:05.644832', 'step': 11595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:05.698654', 'step': 11595, 'epoch': 2} {'type': 'loss', 'content': 0.15752170979976654, 'timestamp': '2025-09-10 02:49:05.705187', 'step': 11596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:05.761067', 'step': 11596, 'epoch': 2} {'type': 'loss', 'content': 0.09795808792114258, 'timestamp': '2025-09-10 02:49:05.763478', 'step': 11597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:05.822916', 'step': 11597, 'epoch': 2} {'type': 'loss', 'content': 0.15332980453968048, 'timestamp': '2025-09-10 02:49:05.825884', 'step': 11598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:05.881299', 'step': 11598, 'epoch': 2} {'type': 'loss', 'content': 0.15757785737514496, 'timestamp': '2025-09-10 02:49:05.883686', 'step': 11599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:05.940578', 'step': 11599, 'epoch': 2} {'type': 'loss', 'content': 0.07355090975761414, 'timestamp': '2025-09-10 02:49:05.947019', 'step': 11600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:06.008974', 'step': 11600, 'epoch': 2} {'type': 'loss', 'content': 0.059100788086652756, 'timestamp': '2025-09-10 02:49:06.011167', 'step': 11601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:06.065299', 'step': 11601, 'epoch': 2} {'type': 'loss', 'content': 0.053365956991910934, 'timestamp': '2025-09-10 02:49:06.067380', 'step': 11602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:06.121402', 'step': 11602, 'epoch': 2} {'type': 'loss', 'content': 0.08558616787195206, 'timestamp': '2025-09-10 02:49:06.123584', 'step': 11603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:06.176478', 'step': 11603, 'epoch': 2} {'type': 'loss', 'content': 0.12246439605951309, 'timestamp': '2025-09-10 02:49:06.182502', 'step': 11604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:06.234693', 'step': 11604, 'epoch': 2} {'type': 'loss', 'content': 0.1397525668144226, 'timestamp': '2025-09-10 02:49:06.236859', 'step': 11605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:06.289974', 'step': 11605, 'epoch': 2} {'type': 'loss', 'content': 0.1320117563009262, 'timestamp': '2025-09-10 02:49:06.292139', 'step': 11606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:06.345062', 'step': 11606, 'epoch': 2} {'type': 'loss', 'content': 0.09171871095895767, 'timestamp': '2025-09-10 02:49:06.347477', 'step': 11607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:06.400846', 'step': 11607, 'epoch': 2} {'type': 'loss', 'content': 0.07672692835330963, 'timestamp': '2025-09-10 02:49:06.406978', 'step': 11608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:06.460470', 'step': 11608, 'epoch': 2} {'type': 'loss', 'content': 0.07435200363397598, 'timestamp': '2025-09-10 02:49:06.462963', 'step': 11609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:06.517309', 'step': 11609, 'epoch': 2} {'type': 'loss', 'content': 0.11555059254169464, 'timestamp': '2025-09-10 02:49:06.519686', 'step': 11610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:06.573439', 'step': 11610, 'epoch': 2} {'type': 'loss', 'content': 0.11991563439369202, 'timestamp': '2025-09-10 02:49:06.575707', 'step': 11611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:06.628705', 'step': 11611, 'epoch': 2} {'type': 'loss', 'content': 0.06349793821573257, 'timestamp': '2025-09-10 02:49:06.635007', 'step': 11612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:06.687309', 'step': 11612, 'epoch': 2} {'type': 'loss', 'content': 0.11773639172315598, 'timestamp': '2025-09-10 02:49:06.689425', 'step': 11613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:06.742214', 'step': 11613, 'epoch': 2} {'type': 'loss', 'content': 0.1429598480463028, 'timestamp': '2025-09-10 02:49:06.744422', 'step': 11614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:06.797572', 'step': 11614, 'epoch': 2} {'type': 'loss', 'content': 0.05749047175049782, 'timestamp': '2025-09-10 02:49:06.799807', 'step': 11615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:06.853261', 'step': 11615, 'epoch': 2} {'type': 'loss', 'content': 0.11451295763254166, 'timestamp': '2025-09-10 02:49:06.859729', 'step': 11616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:06.912205', 'step': 11616, 'epoch': 2} {'type': 'loss', 'content': 0.17055471241474152, 'timestamp': '2025-09-10 02:49:06.914310', 'step': 11617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:06.967642', 'step': 11617, 'epoch': 2} {'type': 'loss', 'content': 0.09964113682508469, 'timestamp': '2025-09-10 02:49:06.970102', 'step': 11618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:07.027721', 'step': 11618, 'epoch': 2} {'type': 'loss', 'content': 0.14258892834186554, 'timestamp': '2025-09-10 02:49:07.030447', 'step': 11619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:07.084566', 'step': 11619, 'epoch': 2} {'type': 'loss', 'content': 0.15391871333122253, 'timestamp': '2025-09-10 02:49:07.090573', 'step': 11620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:07.143173', 'step': 11620, 'epoch': 2} {'type': 'loss', 'content': 0.10021845251321793, 'timestamp': '2025-09-10 02:49:07.145639', 'step': 11621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:07.199551', 'step': 11621, 'epoch': 2} {'type': 'loss', 'content': 0.1411493867635727, 'timestamp': '2025-09-10 02:49:07.201861', 'step': 11622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:07.255156', 'step': 11622, 'epoch': 2} {'type': 'loss', 'content': 0.12885011732578278, 'timestamp': '2025-09-10 02:49:07.257385', 'step': 11623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:07.310234', 'step': 11623, 'epoch': 2} {'type': 'loss', 'content': 0.03403036296367645, 'timestamp': '2025-09-10 02:49:07.316411', 'step': 11624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:07.369345', 'step': 11624, 'epoch': 2} {'type': 'loss', 'content': 0.10266134142875671, 'timestamp': '2025-09-10 02:49:07.371595', 'step': 11625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:07.424481', 'step': 11625, 'epoch': 2} {'type': 'loss', 'content': 0.13078725337982178, 'timestamp': '2025-09-10 02:49:07.426836', 'step': 11626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:07.480152', 'step': 11626, 'epoch': 2} {'type': 'loss', 'content': 0.1594139188528061, 'timestamp': '2025-09-10 02:49:07.482452', 'step': 11627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:07.535851', 'step': 11627, 'epoch': 2} {'type': 'loss', 'content': 0.0663900226354599, 'timestamp': '2025-09-10 02:49:07.541921', 'step': 11628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:07.594545', 'step': 11628, 'epoch': 2} {'type': 'loss', 'content': 0.05806686729192734, 'timestamp': '2025-09-10 02:49:07.596628', 'step': 11629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:07.650061', 'step': 11629, 'epoch': 2} {'type': 'loss', 'content': 0.09204494208097458, 'timestamp': '2025-09-10 02:49:07.652218', 'step': 11630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:07.706846', 'step': 11630, 'epoch': 2} {'type': 'loss', 'content': 0.05083124712109566, 'timestamp': '2025-09-10 02:49:07.709211', 'step': 11631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:07.763180', 'step': 11631, 'epoch': 2} {'type': 'loss', 'content': 0.10719234496355057, 'timestamp': '2025-09-10 02:49:07.769206', 'step': 11632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:07.821793', 'step': 11632, 'epoch': 2} {'type': 'loss', 'content': 0.10406817495822906, 'timestamp': '2025-09-10 02:49:07.823994', 'step': 11633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:07.876931', 'step': 11633, 'epoch': 2} {'type': 'loss', 'content': 0.05654733255505562, 'timestamp': '2025-09-10 02:49:07.879112', 'step': 11634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:07.933275', 'step': 11634, 'epoch': 2} {'type': 'loss', 'content': 0.07055588811635971, 'timestamp': '2025-09-10 02:49:07.935387', 'step': 11635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:07.989201', 'step': 11635, 'epoch': 2} {'type': 'loss', 'content': 0.06714356690645218, 'timestamp': '2025-09-10 02:49:07.995306', 'step': 11636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:08.047894', 'step': 11636, 'epoch': 2} {'type': 'loss', 'content': 0.12049593031406403, 'timestamp': '2025-09-10 02:49:08.050021', 'step': 11637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:08.104767', 'step': 11637, 'epoch': 2} {'type': 'loss', 'content': 0.10937590152025223, 'timestamp': '2025-09-10 02:49:08.106993', 'step': 11638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:08.162600', 'step': 11638, 'epoch': 2} {'type': 'loss', 'content': 0.1640428900718689, 'timestamp': '2025-09-10 02:49:08.165074', 'step': 11639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:08.220264', 'step': 11639, 'epoch': 2} {'type': 'loss', 'content': 0.09498228132724762, 'timestamp': '2025-09-10 02:49:08.226427', 'step': 11640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:08.279770', 'step': 11640, 'epoch': 2} {'type': 'loss', 'content': 0.04421946406364441, 'timestamp': '2025-09-10 02:49:08.282077', 'step': 11641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:08.338639', 'step': 11641, 'epoch': 2} {'type': 'loss', 'content': 0.09994809329509735, 'timestamp': '2025-09-10 02:49:08.340942', 'step': 11642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:08.396662', 'step': 11642, 'epoch': 2} {'type': 'loss', 'content': 0.16962815821170807, 'timestamp': '2025-09-10 02:49:08.398934', 'step': 11643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:08.455458', 'step': 11643, 'epoch': 2} {'type': 'loss', 'content': 0.08399748802185059, 'timestamp': '2025-09-10 02:49:08.461875', 'step': 11644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:08.517002', 'step': 11644, 'epoch': 2} {'type': 'loss', 'content': 0.11193066835403442, 'timestamp': '2025-09-10 02:49:08.519196', 'step': 11645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:08.576740', 'step': 11645, 'epoch': 2} {'type': 'loss', 'content': 0.07501305639743805, 'timestamp': '2025-09-10 02:49:08.578807', 'step': 11646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:08.634945', 'step': 11646, 'epoch': 2} {'type': 'loss', 'content': 0.10292257368564606, 'timestamp': '2025-09-10 02:49:08.637023', 'step': 11647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:08.690811', 'step': 11647, 'epoch': 2} {'type': 'loss', 'content': 0.08180452138185501, 'timestamp': '2025-09-10 02:49:08.696930', 'step': 11648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:08.749428', 'step': 11648, 'epoch': 2} {'type': 'loss', 'content': 0.07013537734746933, 'timestamp': '2025-09-10 02:49:08.751599', 'step': 11649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:08.805474', 'step': 11649, 'epoch': 2} {'type': 'loss', 'content': 0.13377319276332855, 'timestamp': '2025-09-10 02:49:08.807619', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:49:21.735896', 'step': 11650, 'epoch': 2} {'type': 'pplx', 'content': 15892.584330513651, 'timestamp': '2025-09-10 02:49:21.739401', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:21.794438', 'step': 11650, 'epoch': 2} {'type': 'loss', 'content': 0.09157008677721024, 'timestamp': '2025-09-10 02:49:21.796712', 'step': 11651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:21.852906', 'step': 11651, 'epoch': 2} {'type': 'loss', 'content': 0.12988382577896118, 'timestamp': '2025-09-10 02:49:21.859315', 'step': 11652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:21.913442', 'step': 11652, 'epoch': 2} {'type': 'loss', 'content': 0.10916173458099365, 'timestamp': '2025-09-10 02:49:21.915744', 'step': 11653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:21.970232', 'step': 11653, 'epoch': 2} {'type': 'loss', 'content': 0.1990746408700943, 'timestamp': '2025-09-10 02:49:21.972595', 'step': 11654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:22.027276', 'step': 11654, 'epoch': 2} {'type': 'loss', 'content': 0.11165696382522583, 'timestamp': '2025-09-10 02:49:22.029694', 'step': 11655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:22.085272', 'step': 11655, 'epoch': 2} {'type': 'loss', 'content': 0.11231618374586105, 'timestamp': '2025-09-10 02:49:22.091937', 'step': 11656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:22.148066', 'step': 11656, 'epoch': 2} {'type': 'loss', 'content': 0.10275377333164215, 'timestamp': '2025-09-10 02:49:22.150365', 'step': 11657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:22.207384', 'step': 11657, 'epoch': 2} {'type': 'loss', 'content': 0.08532588183879852, 'timestamp': '2025-09-10 02:49:22.209495', 'step': 11658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:22.265030', 'step': 11658, 'epoch': 2} {'type': 'loss', 'content': 0.1056075319647789, 'timestamp': '2025-09-10 02:49:22.267246', 'step': 11659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:22.321947', 'step': 11659, 'epoch': 2} {'type': 'loss', 'content': 0.12182557582855225, 'timestamp': '2025-09-10 02:49:22.328412', 'step': 11660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:22.385076', 'step': 11660, 'epoch': 2} {'type': 'loss', 'content': 0.08947281539440155, 'timestamp': '2025-09-10 02:49:22.387452', 'step': 11661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:22.442082', 'step': 11661, 'epoch': 2} {'type': 'loss', 'content': 0.23552103340625763, 'timestamp': '2025-09-10 02:49:22.444618', 'step': 11662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:22.499940', 'step': 11662, 'epoch': 2} {'type': 'loss', 'content': 0.16384312510490417, 'timestamp': '2025-09-10 02:49:22.502253', 'step': 11663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:22.556874', 'step': 11663, 'epoch': 2} {'type': 'loss', 'content': 0.19629104435443878, 'timestamp': '2025-09-10 02:49:22.563153', 'step': 11664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:22.617405', 'step': 11664, 'epoch': 2} {'type': 'loss', 'content': 0.10732275992631912, 'timestamp': '2025-09-10 02:49:22.619688', 'step': 11665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:22.674888', 'step': 11665, 'epoch': 2} {'type': 'loss', 'content': 0.12355601042509079, 'timestamp': '2025-09-10 02:49:22.677017', 'step': 11666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:22.733896', 'step': 11666, 'epoch': 2} {'type': 'loss', 'content': 0.10087629407644272, 'timestamp': '2025-09-10 02:49:22.736103', 'step': 11667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:22.791012', 'step': 11667, 'epoch': 2} {'type': 'loss', 'content': 0.08454837650060654, 'timestamp': '2025-09-10 02:49:22.797325', 'step': 11668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:22.851738', 'step': 11668, 'epoch': 2} {'type': 'loss', 'content': 0.1788644790649414, 'timestamp': '2025-09-10 02:49:22.854029', 'step': 11669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:22.908892', 'step': 11669, 'epoch': 2} {'type': 'loss', 'content': 0.16785021126270294, 'timestamp': '2025-09-10 02:49:22.911093', 'step': 11670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:22.968377', 'step': 11670, 'epoch': 2} {'type': 'loss', 'content': 0.056333210319280624, 'timestamp': '2025-09-10 02:49:22.970735', 'step': 11671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:23.025494', 'step': 11671, 'epoch': 2} {'type': 'loss', 'content': 0.24833153188228607, 'timestamp': '2025-09-10 02:49:23.031626', 'step': 11672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:23.087068', 'step': 11672, 'epoch': 2} {'type': 'loss', 'content': 0.06176028773188591, 'timestamp': '2025-09-10 02:49:23.089294', 'step': 11673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:23.144561', 'step': 11673, 'epoch': 2} {'type': 'loss', 'content': 0.07242733240127563, 'timestamp': '2025-09-10 02:49:23.146741', 'step': 11674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:23.203162', 'step': 11674, 'epoch': 2} {'type': 'loss', 'content': 0.1235925480723381, 'timestamp': '2025-09-10 02:49:23.205463', 'step': 11675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:23.260901', 'step': 11675, 'epoch': 2} {'type': 'loss', 'content': 0.15066827833652496, 'timestamp': '2025-09-10 02:49:23.267392', 'step': 11676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:23.321550', 'step': 11676, 'epoch': 2} {'type': 'loss', 'content': 0.12106555700302124, 'timestamp': '2025-09-10 02:49:23.323952', 'step': 11677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:23.378513', 'step': 11677, 'epoch': 2} {'type': 'loss', 'content': 0.1527860462665558, 'timestamp': '2025-09-10 02:49:23.380857', 'step': 11678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:23.435710', 'step': 11678, 'epoch': 2} {'type': 'loss', 'content': 0.14805927872657776, 'timestamp': '2025-09-10 02:49:23.437969', 'step': 11679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:23.493150', 'step': 11679, 'epoch': 2} {'type': 'loss', 'content': 0.10490158945322037, 'timestamp': '2025-09-10 02:49:23.499858', 'step': 11680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:23.553709', 'step': 11680, 'epoch': 2} {'type': 'loss', 'content': 0.09100815653800964, 'timestamp': '2025-09-10 02:49:23.555955', 'step': 11681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:23.611297', 'step': 11681, 'epoch': 2} {'type': 'loss', 'content': 0.07203859835863113, 'timestamp': '2025-09-10 02:49:23.613589', 'step': 11682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:23.669005', 'step': 11682, 'epoch': 2} {'type': 'loss', 'content': 0.16710355877876282, 'timestamp': '2025-09-10 02:49:23.671133', 'step': 11683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:23.727067', 'step': 11683, 'epoch': 2} {'type': 'loss', 'content': 0.13706670701503754, 'timestamp': '2025-09-10 02:49:23.733549', 'step': 11684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:23.791543', 'step': 11684, 'epoch': 2} {'type': 'loss', 'content': 0.09429960697889328, 'timestamp': '2025-09-10 02:49:23.793843', 'step': 11685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:23.849303', 'step': 11685, 'epoch': 2} {'type': 'loss', 'content': 0.16673699021339417, 'timestamp': '2025-09-10 02:49:23.851591', 'step': 11686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:23.906915', 'step': 11686, 'epoch': 2} {'type': 'loss', 'content': 0.1899309903383255, 'timestamp': '2025-09-10 02:49:23.908863', 'step': 11687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:23.964290', 'step': 11687, 'epoch': 2} {'type': 'loss', 'content': 0.16286203265190125, 'timestamp': '2025-09-10 02:49:23.970710', 'step': 11688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:24.026081', 'step': 11688, 'epoch': 2} {'type': 'loss', 'content': 0.059216491878032684, 'timestamp': '2025-09-10 02:49:24.028266', 'step': 11689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:24.082839', 'step': 11689, 'epoch': 2} {'type': 'loss', 'content': 0.1086641177535057, 'timestamp': '2025-09-10 02:49:24.085075', 'step': 11690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:24.139724', 'step': 11690, 'epoch': 2} {'type': 'loss', 'content': 0.08962726593017578, 'timestamp': '2025-09-10 02:49:24.142046', 'step': 11691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:24.197363', 'step': 11691, 'epoch': 2} {'type': 'loss', 'content': 0.11214321106672287, 'timestamp': '2025-09-10 02:49:24.203800', 'step': 11692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:24.258238', 'step': 11692, 'epoch': 2} {'type': 'loss', 'content': 0.17939801514148712, 'timestamp': '2025-09-10 02:49:24.260450', 'step': 11693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:24.315672', 'step': 11693, 'epoch': 2} {'type': 'loss', 'content': 0.17233824729919434, 'timestamp': '2025-09-10 02:49:24.318013', 'step': 11694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:24.373288', 'step': 11694, 'epoch': 2} {'type': 'loss', 'content': 0.21284575760364532, 'timestamp': '2025-09-10 02:49:24.375545', 'step': 11695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:24.431702', 'step': 11695, 'epoch': 2} {'type': 'loss', 'content': 0.1880979835987091, 'timestamp': '2025-09-10 02:49:24.438059', 'step': 11696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:24.491797', 'step': 11696, 'epoch': 2} {'type': 'loss', 'content': 0.13840042054653168, 'timestamp': '2025-09-10 02:49:24.493976', 'step': 11697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:24.548318', 'step': 11697, 'epoch': 2} {'type': 'loss', 'content': 0.10910614579916, 'timestamp': '2025-09-10 02:49:24.550695', 'step': 11698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:24.605099', 'step': 11698, 'epoch': 2} {'type': 'loss', 'content': 0.08120090514421463, 'timestamp': '2025-09-10 02:49:24.607078', 'step': 11699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:24.661529', 'step': 11699, 'epoch': 2} {'type': 'loss', 'content': 0.0346464179456234, 'timestamp': '2025-09-10 02:49:24.667638', 'step': 11700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:24.721714', 'step': 11700, 'epoch': 2} {'type': 'loss', 'content': 0.09112954139709473, 'timestamp': '2025-09-10 02:49:24.723907', 'step': 11701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:24.778923', 'step': 11701, 'epoch': 2} {'type': 'loss', 'content': 0.07563228905200958, 'timestamp': '2025-09-10 02:49:24.781138', 'step': 11702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:24.837393', 'step': 11702, 'epoch': 2} {'type': 'loss', 'content': 0.1714305281639099, 'timestamp': '2025-09-10 02:49:24.839558', 'step': 11703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:24.896865', 'step': 11703, 'epoch': 2} {'type': 'loss', 'content': 0.17007605731487274, 'timestamp': '2025-09-10 02:49:24.903309', 'step': 11704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:24.957220', 'step': 11704, 'epoch': 2} {'type': 'loss', 'content': 0.0846555307507515, 'timestamp': '2025-09-10 02:49:24.959632', 'step': 11705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:25.014654', 'step': 11705, 'epoch': 2} {'type': 'loss', 'content': 0.129225492477417, 'timestamp': '2025-09-10 02:49:25.016737', 'step': 11706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:25.071857', 'step': 11706, 'epoch': 2} {'type': 'loss', 'content': 0.21026813983917236, 'timestamp': '2025-09-10 02:49:25.074125', 'step': 11707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:25.129334', 'step': 11707, 'epoch': 2} {'type': 'loss', 'content': 0.12396866828203201, 'timestamp': '2025-09-10 02:49:25.135625', 'step': 11708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:25.189029', 'step': 11708, 'epoch': 2} {'type': 'loss', 'content': 0.20655739307403564, 'timestamp': '2025-09-10 02:49:25.191287', 'step': 11709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:25.245192', 'step': 11709, 'epoch': 2} {'type': 'loss', 'content': 0.2207050323486328, 'timestamp': '2025-09-10 02:49:25.247047', 'step': 11710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:25.300314', 'step': 11710, 'epoch': 2} {'type': 'loss', 'content': 0.11961224675178528, 'timestamp': '2025-09-10 02:49:25.302362', 'step': 11711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:25.355663', 'step': 11711, 'epoch': 2} {'type': 'loss', 'content': 0.09353731572628021, 'timestamp': '2025-09-10 02:49:25.361807', 'step': 11712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:25.417933', 'step': 11712, 'epoch': 2} {'type': 'loss', 'content': 0.09790635854005814, 'timestamp': '2025-09-10 02:49:25.420126', 'step': 11713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:25.477093', 'step': 11713, 'epoch': 2} {'type': 'loss', 'content': 0.154428169131279, 'timestamp': '2025-09-10 02:49:25.479298', 'step': 11714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:25.533876', 'step': 11714, 'epoch': 2} {'type': 'loss', 'content': 0.1255791187286377, 'timestamp': '2025-09-10 02:49:25.536048', 'step': 11715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:25.589574', 'step': 11715, 'epoch': 2} {'type': 'loss', 'content': 0.09379997104406357, 'timestamp': '2025-09-10 02:49:25.595754', 'step': 11716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:25.649444', 'step': 11716, 'epoch': 2} {'type': 'loss', 'content': 0.170902281999588, 'timestamp': '2025-09-10 02:49:25.651592', 'step': 11717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:25.705681', 'step': 11717, 'epoch': 2} {'type': 'loss', 'content': 0.22153277695178986, 'timestamp': '2025-09-10 02:49:25.707815', 'step': 11718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:25.760928', 'step': 11718, 'epoch': 2} {'type': 'loss', 'content': 0.10703636705875397, 'timestamp': '2025-09-10 02:49:25.763171', 'step': 11719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:25.817561', 'step': 11719, 'epoch': 2} {'type': 'loss', 'content': 0.11209670454263687, 'timestamp': '2025-09-10 02:49:25.823889', 'step': 11720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:25.878376', 'step': 11720, 'epoch': 2} {'type': 'loss', 'content': 0.15794683992862701, 'timestamp': '2025-09-10 02:49:25.880724', 'step': 11721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:25.935893', 'step': 11721, 'epoch': 2} {'type': 'loss', 'content': 0.125654399394989, 'timestamp': '2025-09-10 02:49:25.945176', 'step': 11722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:26.003361', 'step': 11722, 'epoch': 2} {'type': 'loss', 'content': 0.1297580599784851, 'timestamp': '2025-09-10 02:49:26.005784', 'step': 11723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:26.060084', 'step': 11723, 'epoch': 2} {'type': 'loss', 'content': 0.08410920947790146, 'timestamp': '2025-09-10 02:49:26.067091', 'step': 11724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:26.122151', 'step': 11724, 'epoch': 2} {'type': 'loss', 'content': 0.09439337998628616, 'timestamp': '2025-09-10 02:49:26.124394', 'step': 11725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:26.185208', 'step': 11725, 'epoch': 2} {'type': 'loss', 'content': 0.1996970772743225, 'timestamp': '2025-09-10 02:49:26.187304', 'step': 11726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:26.242454', 'step': 11726, 'epoch': 2} {'type': 'loss', 'content': 0.08298555016517639, 'timestamp': '2025-09-10 02:49:26.256738', 'step': 11727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:26.316354', 'step': 11727, 'epoch': 2} {'type': 'loss', 'content': 0.14983771741390228, 'timestamp': '2025-09-10 02:49:26.322681', 'step': 11728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:26.387289', 'step': 11728, 'epoch': 2} {'type': 'loss', 'content': 0.15335862338542938, 'timestamp': '2025-09-10 02:49:26.390829', 'step': 11729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:26.451382', 'step': 11729, 'epoch': 2} {'type': 'loss', 'content': 0.06900916248559952, 'timestamp': '2025-09-10 02:49:26.455860', 'step': 11730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:26.513625', 'step': 11730, 'epoch': 2} {'type': 'loss', 'content': 0.103591687977314, 'timestamp': '2025-09-10 02:49:26.515797', 'step': 11731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:26.570466', 'step': 11731, 'epoch': 2} {'type': 'loss', 'content': 0.13690802454948425, 'timestamp': '2025-09-10 02:49:26.576658', 'step': 11732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:26.630430', 'step': 11732, 'epoch': 2} {'type': 'loss', 'content': 0.125456303358078, 'timestamp': '2025-09-10 02:49:26.632759', 'step': 11733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:26.693416', 'step': 11733, 'epoch': 2} {'type': 'loss', 'content': 0.1694110780954361, 'timestamp': '2025-09-10 02:49:26.695842', 'step': 11734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:26.762932', 'step': 11734, 'epoch': 2} {'type': 'loss', 'content': 0.10030639916658401, 'timestamp': '2025-09-10 02:49:26.765348', 'step': 11735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:26.820015', 'step': 11735, 'epoch': 2} {'type': 'loss', 'content': 0.12626327574253082, 'timestamp': '2025-09-10 02:49:26.827306', 'step': 11736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:26.884981', 'step': 11736, 'epoch': 2} {'type': 'loss', 'content': 0.1076994314789772, 'timestamp': '2025-09-10 02:49:26.887297', 'step': 11737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:26.942072', 'step': 11737, 'epoch': 2} {'type': 'loss', 'content': 0.05368999391794205, 'timestamp': '2025-09-10 02:49:26.945629', 'step': 11738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:27.002007', 'step': 11738, 'epoch': 2} {'type': 'loss', 'content': 0.13449892401695251, 'timestamp': '2025-09-10 02:49:27.004071', 'step': 11739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:27.082386', 'step': 11739, 'epoch': 2} {'type': 'loss', 'content': 0.20463034510612488, 'timestamp': '2025-09-10 02:49:27.091570', 'step': 11740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:27.160150', 'step': 11740, 'epoch': 2} {'type': 'loss', 'content': 0.11015245318412781, 'timestamp': '2025-09-10 02:49:27.162613', 'step': 11741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:27.220797', 'step': 11741, 'epoch': 2} {'type': 'loss', 'content': 0.05271434411406517, 'timestamp': '2025-09-10 02:49:27.222509', 'step': 11742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:27.276618', 'step': 11742, 'epoch': 2} {'type': 'loss', 'content': 0.09926878660917282, 'timestamp': '2025-09-10 02:49:27.278289', 'step': 11743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:27.332630', 'step': 11743, 'epoch': 2} {'type': 'loss', 'content': 0.15339115262031555, 'timestamp': '2025-09-10 02:49:27.341092', 'step': 11744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:27.398633', 'step': 11744, 'epoch': 2} {'type': 'loss', 'content': 0.14695227146148682, 'timestamp': '2025-09-10 02:49:27.400800', 'step': 11745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:27.460142', 'step': 11745, 'epoch': 2} {'type': 'loss', 'content': 0.10380877554416656, 'timestamp': '2025-09-10 02:49:27.465815', 'step': 11746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:27.521339', 'step': 11746, 'epoch': 2} {'type': 'loss', 'content': 0.15706931054592133, 'timestamp': '2025-09-10 02:49:27.523352', 'step': 11747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:27.578796', 'step': 11747, 'epoch': 2} {'type': 'loss', 'content': 0.11159812659025192, 'timestamp': '2025-09-10 02:49:27.585001', 'step': 11748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:27.638995', 'step': 11748, 'epoch': 2} {'type': 'loss', 'content': 0.08791427314281464, 'timestamp': '2025-09-10 02:49:27.641415', 'step': 11749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:27.695881', 'step': 11749, 'epoch': 2} {'type': 'loss', 'content': 0.22147893905639648, 'timestamp': '2025-09-10 02:49:27.698976', 'step': 11750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:27.755887', 'step': 11750, 'epoch': 2} {'type': 'loss', 'content': 0.09354905784130096, 'timestamp': '2025-09-10 02:49:27.758075', 'step': 11751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:27.811880', 'step': 11751, 'epoch': 2} {'type': 'loss', 'content': 0.07519903033971786, 'timestamp': '2025-09-10 02:49:27.819793', 'step': 11752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:27.872902', 'step': 11752, 'epoch': 2} {'type': 'loss', 'content': 0.23494599759578705, 'timestamp': '2025-09-10 02:49:27.877420', 'step': 11753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:27.933684', 'step': 11753, 'epoch': 2} {'type': 'loss', 'content': 0.09523314237594604, 'timestamp': '2025-09-10 02:49:27.935934', 'step': 11754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:27.991711', 'step': 11754, 'epoch': 2} {'type': 'loss', 'content': 0.08428506553173065, 'timestamp': '2025-09-10 02:49:27.993962', 'step': 11755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:28.049484', 'step': 11755, 'epoch': 2} {'type': 'loss', 'content': 0.11814780533313751, 'timestamp': '2025-09-10 02:49:28.055662', 'step': 11756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:28.109800', 'step': 11756, 'epoch': 2} {'type': 'loss', 'content': 0.1487683355808258, 'timestamp': '2025-09-10 02:49:28.111946', 'step': 11757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:28.165734', 'step': 11757, 'epoch': 2} {'type': 'loss', 'content': 0.10475709289312363, 'timestamp': '2025-09-10 02:49:28.170525', 'step': 11758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:28.226564', 'step': 11758, 'epoch': 2} {'type': 'loss', 'content': 0.17755189538002014, 'timestamp': '2025-09-10 02:49:28.228970', 'step': 11759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:28.284900', 'step': 11759, 'epoch': 2} {'type': 'loss', 'content': 0.14673419296741486, 'timestamp': '2025-09-10 02:49:28.292970', 'step': 11760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:28.347532', 'step': 11760, 'epoch': 2} {'type': 'loss', 'content': 0.1745680868625641, 'timestamp': '2025-09-10 02:49:28.349689', 'step': 11761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:28.405813', 'step': 11761, 'epoch': 2} {'type': 'loss', 'content': 0.07909942418336868, 'timestamp': '2025-09-10 02:49:28.408146', 'step': 11762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:28.471508', 'step': 11762, 'epoch': 2} {'type': 'loss', 'content': 0.13868708908557892, 'timestamp': '2025-09-10 02:49:28.473865', 'step': 11763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:28.528231', 'step': 11763, 'epoch': 2} {'type': 'loss', 'content': 0.08363581448793411, 'timestamp': '2025-09-10 02:49:28.534369', 'step': 11764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:28.588963', 'step': 11764, 'epoch': 2} {'type': 'loss', 'content': 0.14843040704727173, 'timestamp': '2025-09-10 02:49:28.592365', 'step': 11765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:28.647943', 'step': 11765, 'epoch': 2} {'type': 'loss', 'content': 0.06048052757978439, 'timestamp': '2025-09-10 02:49:28.650389', 'step': 11766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:28.706534', 'step': 11766, 'epoch': 2} {'type': 'loss', 'content': 0.0925799235701561, 'timestamp': '2025-09-10 02:49:28.708936', 'step': 11767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:28.762413', 'step': 11767, 'epoch': 2} {'type': 'loss', 'content': 0.13096505403518677, 'timestamp': '2025-09-10 02:49:28.768451', 'step': 11768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:28.821406', 'step': 11768, 'epoch': 2} {'type': 'loss', 'content': 0.11796066164970398, 'timestamp': '2025-09-10 02:49:28.823637', 'step': 11769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:28.877645', 'step': 11769, 'epoch': 2} {'type': 'loss', 'content': 0.1481141448020935, 'timestamp': '2025-09-10 02:49:28.879714', 'step': 11770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:28.933646', 'step': 11770, 'epoch': 2} {'type': 'loss', 'content': 0.025657610967755318, 'timestamp': '2025-09-10 02:49:28.935802', 'step': 11771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:28.991418', 'step': 11771, 'epoch': 2} {'type': 'loss', 'content': 0.1164819523692131, 'timestamp': '2025-09-10 02:49:28.997245', 'step': 11772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:29.050748', 'step': 11772, 'epoch': 2} {'type': 'loss', 'content': 0.09738842397928238, 'timestamp': '2025-09-10 02:49:29.052900', 'step': 11773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:29.107276', 'step': 11773, 'epoch': 2} {'type': 'loss', 'content': 0.1570979654788971, 'timestamp': '2025-09-10 02:49:29.109462', 'step': 11774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:29.163803', 'step': 11774, 'epoch': 2} {'type': 'loss', 'content': 0.19683007895946503, 'timestamp': '2025-09-10 02:49:29.165817', 'step': 11775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:29.219951', 'step': 11775, 'epoch': 2} {'type': 'loss', 'content': 0.09335587918758392, 'timestamp': '2025-09-10 02:49:29.225729', 'step': 11776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:29.279262', 'step': 11776, 'epoch': 2} {'type': 'loss', 'content': 0.2330332100391388, 'timestamp': '2025-09-10 02:49:29.281511', 'step': 11777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:29.335441', 'step': 11777, 'epoch': 2} {'type': 'loss', 'content': 0.07674497365951538, 'timestamp': '2025-09-10 02:49:29.337544', 'step': 11778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:29.392655', 'step': 11778, 'epoch': 2} {'type': 'loss', 'content': 0.09986562281847, 'timestamp': '2025-09-10 02:49:29.394586', 'step': 11779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:29.448932', 'step': 11779, 'epoch': 2} {'type': 'loss', 'content': 0.14546382427215576, 'timestamp': '2025-09-10 02:49:29.454555', 'step': 11780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:29.508231', 'step': 11780, 'epoch': 2} {'type': 'loss', 'content': 0.09008367359638214, 'timestamp': '2025-09-10 02:49:29.510538', 'step': 11781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:29.563656', 'step': 11781, 'epoch': 2} {'type': 'loss', 'content': 0.138640359044075, 'timestamp': '2025-09-10 02:49:29.565974', 'step': 11782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:29.621740', 'step': 11782, 'epoch': 2} {'type': 'loss', 'content': 0.1522255390882492, 'timestamp': '2025-09-10 02:49:29.623960', 'step': 11783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:29.680008', 'step': 11783, 'epoch': 2} {'type': 'loss', 'content': 0.1468876749277115, 'timestamp': '2025-09-10 02:49:29.686249', 'step': 11784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:29.740391', 'step': 11784, 'epoch': 2} {'type': 'loss', 'content': 0.0951770469546318, 'timestamp': '2025-09-10 02:49:29.742747', 'step': 11785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:29.798288', 'step': 11785, 'epoch': 2} {'type': 'loss', 'content': 0.11057516187429428, 'timestamp': '2025-09-10 02:49:29.800726', 'step': 11786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:29.855003', 'step': 11786, 'epoch': 2} {'type': 'loss', 'content': 0.1985313445329666, 'timestamp': '2025-09-10 02:49:29.856908', 'step': 11787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:29.911378', 'step': 11787, 'epoch': 2} {'type': 'loss', 'content': 0.11251656711101532, 'timestamp': '2025-09-10 02:49:29.917436', 'step': 11788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:29.970617', 'step': 11788, 'epoch': 2} {'type': 'loss', 'content': 0.11883281171321869, 'timestamp': '2025-09-10 02:49:29.972535', 'step': 11789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:30.026515', 'step': 11789, 'epoch': 2} {'type': 'loss', 'content': 0.1798156052827835, 'timestamp': '2025-09-10 02:49:30.028452', 'step': 11790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:30.083266', 'step': 11790, 'epoch': 2} {'type': 'loss', 'content': 0.16460618376731873, 'timestamp': '2025-09-10 02:49:30.085640', 'step': 11791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:30.141874', 'step': 11791, 'epoch': 2} {'type': 'loss', 'content': 0.08682204782962799, 'timestamp': '2025-09-10 02:49:30.148112', 'step': 11792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:30.202468', 'step': 11792, 'epoch': 2} {'type': 'loss', 'content': 0.09855648875236511, 'timestamp': '2025-09-10 02:49:30.204709', 'step': 11793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:30.266855', 'step': 11793, 'epoch': 2} {'type': 'loss', 'content': 0.05914636328816414, 'timestamp': '2025-09-10 02:49:30.268914', 'step': 11794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:30.326920', 'step': 11794, 'epoch': 2} {'type': 'loss', 'content': 0.10346877574920654, 'timestamp': '2025-09-10 02:49:30.329121', 'step': 11795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:30.390270', 'step': 11795, 'epoch': 2} {'type': 'loss', 'content': 0.14076277613639832, 'timestamp': '2025-09-10 02:49:30.396105', 'step': 11796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:30.449203', 'step': 11796, 'epoch': 2} {'type': 'loss', 'content': 0.0951838418841362, 'timestamp': '2025-09-10 02:49:30.451105', 'step': 11797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:30.509343', 'step': 11797, 'epoch': 2} {'type': 'loss', 'content': 0.11216828227043152, 'timestamp': '2025-09-10 02:49:30.511481', 'step': 11798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:30.564395', 'step': 11798, 'epoch': 2} {'type': 'loss', 'content': 0.17289236187934875, 'timestamp': '2025-09-10 02:49:30.568551', 'step': 11799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:30.622621', 'step': 11799, 'epoch': 2} {'type': 'loss', 'content': 0.10648105293512344, 'timestamp': '2025-09-10 02:49:30.629913', 'step': 11800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:30.684996', 'step': 11800, 'epoch': 2} {'type': 'loss', 'content': 0.11430903524160385, 'timestamp': '2025-09-10 02:49:30.686720', 'step': 11801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:30.742448', 'step': 11801, 'epoch': 2} {'type': 'loss', 'content': 0.13945133984088898, 'timestamp': '2025-09-10 02:49:30.744502', 'step': 11802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:30.799923', 'step': 11802, 'epoch': 2} {'type': 'loss', 'content': 0.11846506595611572, 'timestamp': '2025-09-10 02:49:30.802075', 'step': 11803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:30.855703', 'step': 11803, 'epoch': 2} {'type': 'loss', 'content': 0.15021948516368866, 'timestamp': '2025-09-10 02:49:30.863358', 'step': 11804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:30.919317', 'step': 11804, 'epoch': 2} {'type': 'loss', 'content': 0.06781850755214691, 'timestamp': '2025-09-10 02:49:30.921358', 'step': 11805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:30.974655', 'step': 11805, 'epoch': 2} {'type': 'loss', 'content': 0.13955645263195038, 'timestamp': '2025-09-10 02:49:30.981766', 'step': 11806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:31.040670', 'step': 11806, 'epoch': 2} {'type': 'loss', 'content': 0.14260901510715485, 'timestamp': '2025-09-10 02:49:31.044700', 'step': 11807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:31.105331', 'step': 11807, 'epoch': 2} {'type': 'loss', 'content': 0.14513157308101654, 'timestamp': '2025-09-10 02:49:31.111396', 'step': 11808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:31.165877', 'step': 11808, 'epoch': 2} {'type': 'loss', 'content': 0.11101536452770233, 'timestamp': '2025-09-10 02:49:31.167830', 'step': 11809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:31.222663', 'step': 11809, 'epoch': 2} {'type': 'loss', 'content': 0.18613141775131226, 'timestamp': '2025-09-10 02:49:31.224751', 'step': 11810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:31.278374', 'step': 11810, 'epoch': 2} {'type': 'loss', 'content': 0.1111476719379425, 'timestamp': '2025-09-10 02:49:31.280350', 'step': 11811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:31.333785', 'step': 11811, 'epoch': 2} {'type': 'loss', 'content': 0.13783222436904907, 'timestamp': '2025-09-10 02:49:31.340069', 'step': 11812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:31.392606', 'step': 11812, 'epoch': 2} {'type': 'loss', 'content': 0.08770827949047089, 'timestamp': '2025-09-10 02:49:31.394762', 'step': 11813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:31.448305', 'step': 11813, 'epoch': 2} {'type': 'loss', 'content': 0.1288103610277176, 'timestamp': '2025-09-10 02:49:31.450544', 'step': 11814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:31.503973', 'step': 11814, 'epoch': 2} {'type': 'loss', 'content': 0.19564050436019897, 'timestamp': '2025-09-10 02:49:31.506176', 'step': 11815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:31.559716', 'step': 11815, 'epoch': 2} {'type': 'loss', 'content': 0.05345439538359642, 'timestamp': '2025-09-10 02:49:31.565755', 'step': 11816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:31.619226', 'step': 11816, 'epoch': 2} {'type': 'loss', 'content': 0.20184342563152313, 'timestamp': '2025-09-10 02:49:31.621174', 'step': 11817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:31.675097', 'step': 11817, 'epoch': 2} {'type': 'loss', 'content': 0.13206954300403595, 'timestamp': '2025-09-10 02:49:31.677196', 'step': 11818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:31.730893', 'step': 11818, 'epoch': 2} {'type': 'loss', 'content': 0.217452272772789, 'timestamp': '2025-09-10 02:49:31.733046', 'step': 11819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:31.786468', 'step': 11819, 'epoch': 2} {'type': 'loss', 'content': 0.03931768983602524, 'timestamp': '2025-09-10 02:49:31.792558', 'step': 11820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:31.846435', 'step': 11820, 'epoch': 2} {'type': 'loss', 'content': 0.032426249235868454, 'timestamp': '2025-09-10 02:49:31.848857', 'step': 11821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:31.902844', 'step': 11821, 'epoch': 2} {'type': 'loss', 'content': 0.17107680439949036, 'timestamp': '2025-09-10 02:49:31.905702', 'step': 11822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:31.960026', 'step': 11822, 'epoch': 2} {'type': 'loss', 'content': 0.06583940982818604, 'timestamp': '2025-09-10 02:49:31.962202', 'step': 11823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:32.016863', 'step': 11823, 'epoch': 2} {'type': 'loss', 'content': 0.121538445353508, 'timestamp': '2025-09-10 02:49:32.022823', 'step': 11824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:32.076187', 'step': 11824, 'epoch': 2} {'type': 'loss', 'content': 0.11544796824455261, 'timestamp': '2025-09-10 02:49:32.078302', 'step': 11825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:32.131906', 'step': 11825, 'epoch': 2} {'type': 'loss', 'content': 0.1331336498260498, 'timestamp': '2025-09-10 02:49:32.134114', 'step': 11826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:32.189851', 'step': 11826, 'epoch': 2} {'type': 'loss', 'content': 0.11333096027374268, 'timestamp': '2025-09-10 02:49:32.192096', 'step': 11827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:32.246455', 'step': 11827, 'epoch': 2} {'type': 'loss', 'content': 0.2031240463256836, 'timestamp': '2025-09-10 02:49:32.252610', 'step': 11828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:32.306385', 'step': 11828, 'epoch': 2} {'type': 'loss', 'content': 0.09231344610452652, 'timestamp': '2025-09-10 02:49:32.308361', 'step': 11829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:32.363144', 'step': 11829, 'epoch': 2} {'type': 'loss', 'content': 0.2642286717891693, 'timestamp': '2025-09-10 02:49:32.365395', 'step': 11830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:32.422370', 'step': 11830, 'epoch': 2} {'type': 'loss', 'content': 0.1315820962190628, 'timestamp': '2025-09-10 02:49:32.424578', 'step': 11831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:32.479630', 'step': 11831, 'epoch': 2} {'type': 'loss', 'content': 0.17854957282543182, 'timestamp': '2025-09-10 02:49:32.485801', 'step': 11832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:32.539879', 'step': 11832, 'epoch': 2} {'type': 'loss', 'content': 0.13122403621673584, 'timestamp': '2025-09-10 02:49:32.541799', 'step': 11833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:32.595356', 'step': 11833, 'epoch': 2} {'type': 'loss', 'content': 0.22785818576812744, 'timestamp': '2025-09-10 02:49:32.597661', 'step': 11834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:32.652357', 'step': 11834, 'epoch': 2} {'type': 'loss', 'content': 0.2182299941778183, 'timestamp': '2025-09-10 02:49:32.654676', 'step': 11835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:32.709111', 'step': 11835, 'epoch': 2} {'type': 'loss', 'content': 0.09747082740068436, 'timestamp': '2025-09-10 02:49:32.715283', 'step': 11836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:32.769619', 'step': 11836, 'epoch': 2} {'type': 'loss', 'content': 0.07009301334619522, 'timestamp': '2025-09-10 02:49:32.771637', 'step': 11837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:32.825720', 'step': 11837, 'epoch': 2} {'type': 'loss', 'content': 0.09655115753412247, 'timestamp': '2025-09-10 02:49:32.827808', 'step': 11838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:32.882306', 'step': 11838, 'epoch': 2} {'type': 'loss', 'content': 0.16016463935375214, 'timestamp': '2025-09-10 02:49:32.884485', 'step': 11839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:32.938645', 'step': 11839, 'epoch': 2} {'type': 'loss', 'content': 0.11848007887601852, 'timestamp': '2025-09-10 02:49:32.944533', 'step': 11840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:32.997843', 'step': 11840, 'epoch': 2} {'type': 'loss', 'content': 0.1883459836244583, 'timestamp': '2025-09-10 02:49:33.000007', 'step': 11841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:33.054430', 'step': 11841, 'epoch': 2} {'type': 'loss', 'content': 0.20556271076202393, 'timestamp': '2025-09-10 02:49:33.056683', 'step': 11842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:33.110835', 'step': 11842, 'epoch': 2} {'type': 'loss', 'content': 0.10314828157424927, 'timestamp': '2025-09-10 02:49:33.112911', 'step': 11843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:33.168127', 'step': 11843, 'epoch': 2} {'type': 'loss', 'content': 0.04915776476264, 'timestamp': '2025-09-10 02:49:33.174333', 'step': 11844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:33.227774', 'step': 11844, 'epoch': 2} {'type': 'loss', 'content': 0.1498253345489502, 'timestamp': '2025-09-10 02:49:33.229885', 'step': 11845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:33.283609', 'step': 11845, 'epoch': 2} {'type': 'loss', 'content': 0.14094042778015137, 'timestamp': '2025-09-10 02:49:33.285593', 'step': 11846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:33.341481', 'step': 11846, 'epoch': 2} {'type': 'loss', 'content': 0.15695089101791382, 'timestamp': '2025-09-10 02:49:33.343580', 'step': 11847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:33.397947', 'step': 11847, 'epoch': 2} {'type': 'loss', 'content': 0.02896314673125744, 'timestamp': '2025-09-10 02:49:33.404295', 'step': 11848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:33.458962', 'step': 11848, 'epoch': 2} {'type': 'loss', 'content': 0.12157601118087769, 'timestamp': '2025-09-10 02:49:33.461454', 'step': 11849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:33.515578', 'step': 11849, 'epoch': 2} {'type': 'loss', 'content': 0.0962732583284378, 'timestamp': '2025-09-10 02:49:33.517824', 'step': 11850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:33.571356', 'step': 11850, 'epoch': 2} {'type': 'loss', 'content': 0.11653277277946472, 'timestamp': '2025-09-10 02:49:33.573556', 'step': 11851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:33.626731', 'step': 11851, 'epoch': 2} {'type': 'loss', 'content': 0.1237003430724144, 'timestamp': '2025-09-10 02:49:33.632930', 'step': 11852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:33.685952', 'step': 11852, 'epoch': 2} {'type': 'loss', 'content': 0.05637168884277344, 'timestamp': '2025-09-10 02:49:33.687858', 'step': 11853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:33.746686', 'step': 11853, 'epoch': 2} {'type': 'loss', 'content': 0.12635695934295654, 'timestamp': '2025-09-10 02:49:33.748830', 'step': 11854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:33.802620', 'step': 11854, 'epoch': 2} {'type': 'loss', 'content': 0.12253160029649734, 'timestamp': '2025-09-10 02:49:33.804773', 'step': 11855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:33.858482', 'step': 11855, 'epoch': 2} {'type': 'loss', 'content': 0.19112364947795868, 'timestamp': '2025-09-10 02:49:33.864749', 'step': 11856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:33.918656', 'step': 11856, 'epoch': 2} {'type': 'loss', 'content': 0.12310633063316345, 'timestamp': '2025-09-10 02:49:33.923604', 'step': 11857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:33.978724', 'step': 11857, 'epoch': 2} {'type': 'loss', 'content': 0.12465514987707138, 'timestamp': '2025-09-10 02:49:33.981116', 'step': 11858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:34.034610', 'step': 11858, 'epoch': 2} {'type': 'loss', 'content': 0.12805749475955963, 'timestamp': '2025-09-10 02:49:34.036811', 'step': 11859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:34.093205', 'step': 11859, 'epoch': 2} {'type': 'loss', 'content': 0.13310691714286804, 'timestamp': '2025-09-10 02:49:34.099458', 'step': 11860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:34.151871', 'step': 11860, 'epoch': 2} {'type': 'loss', 'content': 0.11158321052789688, 'timestamp': '2025-09-10 02:49:34.153918', 'step': 11861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:34.206534', 'step': 11861, 'epoch': 2} {'type': 'loss', 'content': 0.22673876583576202, 'timestamp': '2025-09-10 02:49:34.208599', 'step': 11862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:34.261997', 'step': 11862, 'epoch': 2} {'type': 'loss', 'content': 0.18183369934558868, 'timestamp': '2025-09-10 02:49:34.264311', 'step': 11863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:34.317498', 'step': 11863, 'epoch': 2} {'type': 'loss', 'content': 0.11675700545310974, 'timestamp': '2025-09-10 02:49:34.323614', 'step': 11864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:34.376232', 'step': 11864, 'epoch': 2} {'type': 'loss', 'content': 0.05494965240359306, 'timestamp': '2025-09-10 02:49:34.378445', 'step': 11865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:34.431054', 'step': 11865, 'epoch': 2} {'type': 'loss', 'content': 0.1293235570192337, 'timestamp': '2025-09-10 02:49:34.433061', 'step': 11866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:34.486529', 'step': 11866, 'epoch': 2} {'type': 'loss', 'content': 0.07119592279195786, 'timestamp': '2025-09-10 02:49:34.488773', 'step': 11867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:34.542924', 'step': 11867, 'epoch': 2} {'type': 'loss', 'content': 0.13302873075008392, 'timestamp': '2025-09-10 02:49:34.548817', 'step': 11868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:34.601131', 'step': 11868, 'epoch': 2} {'type': 'loss', 'content': 0.11359380185604095, 'timestamp': '2025-09-10 02:49:34.603287', 'step': 11869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:34.656650', 'step': 11869, 'epoch': 2} {'type': 'loss', 'content': 0.07567417621612549, 'timestamp': '2025-09-10 02:49:34.658842', 'step': 11870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:34.711938', 'step': 11870, 'epoch': 2} {'type': 'loss', 'content': 0.18341918289661407, 'timestamp': '2025-09-10 02:49:34.714148', 'step': 11871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:34.769250', 'step': 11871, 'epoch': 2} {'type': 'loss', 'content': 0.10327833890914917, 'timestamp': '2025-09-10 02:49:34.775082', 'step': 11872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:34.827900', 'step': 11872, 'epoch': 2} {'type': 'loss', 'content': 0.1546471118927002, 'timestamp': '2025-09-10 02:49:34.830135', 'step': 11873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:34.885745', 'step': 11873, 'epoch': 2} {'type': 'loss', 'content': 0.1672157645225525, 'timestamp': '2025-09-10 02:49:34.887875', 'step': 11874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:34.942004', 'step': 11874, 'epoch': 2} {'type': 'loss', 'content': 0.03496062383055687, 'timestamp': '2025-09-10 02:49:34.944287', 'step': 11875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:34.998812', 'step': 11875, 'epoch': 2} {'type': 'loss', 'content': 0.13152316212654114, 'timestamp': '2025-09-10 02:49:35.004845', 'step': 11876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:35.058434', 'step': 11876, 'epoch': 2} {'type': 'loss', 'content': 0.08456353843212128, 'timestamp': '2025-09-10 02:49:35.060724', 'step': 11877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:35.113966', 'step': 11877, 'epoch': 2} {'type': 'loss', 'content': 0.14944638311862946, 'timestamp': '2025-09-10 02:49:35.116237', 'step': 11878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:35.175485', 'step': 11878, 'epoch': 2} {'type': 'loss', 'content': 0.12985756993293762, 'timestamp': '2025-09-10 02:49:35.178041', 'step': 11879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:35.231442', 'step': 11879, 'epoch': 2} {'type': 'loss', 'content': 0.1628248393535614, 'timestamp': '2025-09-10 02:49:35.237283', 'step': 11880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:35.289650', 'step': 11880, 'epoch': 2} {'type': 'loss', 'content': 0.1442936211824417, 'timestamp': '2025-09-10 02:49:35.298787', 'step': 11881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:35.355933', 'step': 11881, 'epoch': 2} {'type': 'loss', 'content': 0.19697242975234985, 'timestamp': '2025-09-10 02:49:35.357962', 'step': 11882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:35.415981', 'step': 11882, 'epoch': 2} {'type': 'loss', 'content': 0.17488698661327362, 'timestamp': '2025-09-10 02:49:35.417899', 'step': 11883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:35.473027', 'step': 11883, 'epoch': 2} {'type': 'loss', 'content': 0.14143255352973938, 'timestamp': '2025-09-10 02:49:35.478697', 'step': 11884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:35.532866', 'step': 11884, 'epoch': 2} {'type': 'loss', 'content': 0.15547175705432892, 'timestamp': '2025-09-10 02:49:35.535083', 'step': 11885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:35.591243', 'step': 11885, 'epoch': 2} {'type': 'loss', 'content': 0.17107951641082764, 'timestamp': '2025-09-10 02:49:35.593394', 'step': 11886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:35.652794', 'step': 11886, 'epoch': 2} {'type': 'loss', 'content': 0.13871780037879944, 'timestamp': '2025-09-10 02:49:35.654998', 'step': 11887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:35.710461', 'step': 11887, 'epoch': 2} {'type': 'loss', 'content': 0.06241687387228012, 'timestamp': '2025-09-10 02:49:35.720882', 'step': 11888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:35.779561', 'step': 11888, 'epoch': 2} {'type': 'loss', 'content': 0.16406583786010742, 'timestamp': '2025-09-10 02:49:35.781629', 'step': 11889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:35.835523', 'step': 11889, 'epoch': 2} {'type': 'loss', 'content': 0.11995997279882431, 'timestamp': '2025-09-10 02:49:35.837679', 'step': 11890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:35.891332', 'step': 11890, 'epoch': 2} {'type': 'loss', 'content': 0.1270214468240738, 'timestamp': '2025-09-10 02:49:35.893504', 'step': 11891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:35.948053', 'step': 11891, 'epoch': 2} {'type': 'loss', 'content': 0.1948849856853485, 'timestamp': '2025-09-10 02:49:35.954494', 'step': 11892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:36.008599', 'step': 11892, 'epoch': 2} {'type': 'loss', 'content': 0.1140456572175026, 'timestamp': '2025-09-10 02:49:36.011033', 'step': 11893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:36.066373', 'step': 11893, 'epoch': 2} {'type': 'loss', 'content': 0.13846959173679352, 'timestamp': '2025-09-10 02:49:36.072582', 'step': 11894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:36.126382', 'step': 11894, 'epoch': 2} {'type': 'loss', 'content': 0.24997350573539734, 'timestamp': '2025-09-10 02:49:36.128459', 'step': 11895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:36.182784', 'step': 11895, 'epoch': 2} {'type': 'loss', 'content': 0.05002019181847572, 'timestamp': '2025-09-10 02:49:36.188827', 'step': 11896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:36.242017', 'step': 11896, 'epoch': 2} {'type': 'loss', 'content': 0.2058185487985611, 'timestamp': '2025-09-10 02:49:36.245071', 'step': 11897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:36.301136', 'step': 11897, 'epoch': 2} {'type': 'loss', 'content': 0.08920469880104065, 'timestamp': '2025-09-10 02:49:36.303322', 'step': 11898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:36.356452', 'step': 11898, 'epoch': 2} {'type': 'loss', 'content': 0.11518922448158264, 'timestamp': '2025-09-10 02:49:36.360104', 'step': 11899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:36.413771', 'step': 11899, 'epoch': 2} {'type': 'loss', 'content': 0.1043810024857521, 'timestamp': '2025-09-10 02:49:36.420009', 'step': 11900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:36.474971', 'step': 11900, 'epoch': 2} {'type': 'loss', 'content': 0.2801891565322876, 'timestamp': '2025-09-10 02:49:36.477161', 'step': 11901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:36.531683', 'step': 11901, 'epoch': 2} {'type': 'loss', 'content': 0.13481000065803528, 'timestamp': '2025-09-10 02:49:36.533584', 'step': 11902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:36.588210', 'step': 11902, 'epoch': 2} {'type': 'loss', 'content': 0.13415242731571198, 'timestamp': '2025-09-10 02:49:36.590575', 'step': 11903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:36.644767', 'step': 11903, 'epoch': 2} {'type': 'loss', 'content': 0.1943865865468979, 'timestamp': '2025-09-10 02:49:36.650747', 'step': 11904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:36.704733', 'step': 11904, 'epoch': 2} {'type': 'loss', 'content': 0.14032234251499176, 'timestamp': '2025-09-10 02:49:36.706839', 'step': 11905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:36.760161', 'step': 11905, 'epoch': 2} {'type': 'loss', 'content': 0.1126989796757698, 'timestamp': '2025-09-10 02:49:36.764943', 'step': 11906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:36.822737', 'step': 11906, 'epoch': 2} {'type': 'loss', 'content': 0.07905549556016922, 'timestamp': '2025-09-10 02:49:36.825167', 'step': 11907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:36.879111', 'step': 11907, 'epoch': 2} {'type': 'loss', 'content': 0.12367981672286987, 'timestamp': '2025-09-10 02:49:36.885082', 'step': 11908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:36.937834', 'step': 11908, 'epoch': 2} {'type': 'loss', 'content': 0.06674955785274506, 'timestamp': '2025-09-10 02:49:36.940150', 'step': 11909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:36.996265', 'step': 11909, 'epoch': 2} {'type': 'loss', 'content': 0.20865091681480408, 'timestamp': '2025-09-10 02:49:36.998346', 'step': 11910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:37.052618', 'step': 11910, 'epoch': 2} {'type': 'loss', 'content': 0.16309748589992523, 'timestamp': '2025-09-10 02:49:37.054845', 'step': 11911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:37.114798', 'step': 11911, 'epoch': 2} {'type': 'loss', 'content': 0.1116044893860817, 'timestamp': '2025-09-10 02:49:37.126909', 'step': 11912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.190954', 'step': 11912, 'epoch': 2} {'type': 'loss', 'content': 0.11198648065328598, 'timestamp': '2025-09-10 02:49:37.193598', 'step': 11913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.251584', 'step': 11913, 'epoch': 2} {'type': 'loss', 'content': 0.13779670000076294, 'timestamp': '2025-09-10 02:49:37.253903', 'step': 11914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:37.310368', 'step': 11914, 'epoch': 2} {'type': 'loss', 'content': 0.24447260797023773, 'timestamp': '2025-09-10 02:49:37.312894', 'step': 11915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.368558', 'step': 11915, 'epoch': 2} {'type': 'loss', 'content': 0.09052800387144089, 'timestamp': '2025-09-10 02:49:37.374588', 'step': 11916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.428393', 'step': 11916, 'epoch': 2} {'type': 'loss', 'content': 0.07111146301031113, 'timestamp': '2025-09-10 02:49:37.430725', 'step': 11917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:37.487198', 'step': 11917, 'epoch': 2} {'type': 'loss', 'content': 0.11325855553150177, 'timestamp': '2025-09-10 02:49:37.489347', 'step': 11918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:37.544597', 'step': 11918, 'epoch': 2} {'type': 'loss', 'content': 0.08087126165628433, 'timestamp': '2025-09-10 02:49:37.546846', 'step': 11919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.602159', 'step': 11919, 'epoch': 2} {'type': 'loss', 'content': 0.12702007591724396, 'timestamp': '2025-09-10 02:49:37.607993', 'step': 11920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.665783', 'step': 11920, 'epoch': 2} {'type': 'loss', 'content': 0.12517650425434113, 'timestamp': '2025-09-10 02:49:37.668068', 'step': 11921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.724492', 'step': 11921, 'epoch': 2} {'type': 'loss', 'content': 0.08382078260183334, 'timestamp': '2025-09-10 02:49:37.726809', 'step': 11922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.780939', 'step': 11922, 'epoch': 2} {'type': 'loss', 'content': 0.15861356258392334, 'timestamp': '2025-09-10 02:49:37.783087', 'step': 11923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.837462', 'step': 11923, 'epoch': 2} {'type': 'loss', 'content': 0.1677677035331726, 'timestamp': '2025-09-10 02:49:37.843536', 'step': 11924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.896820', 'step': 11924, 'epoch': 2} {'type': 'loss', 'content': 0.0897098183631897, 'timestamp': '2025-09-10 02:49:37.899534', 'step': 11925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:37.955986', 'step': 11925, 'epoch': 2} {'type': 'loss', 'content': 0.15750445425510406, 'timestamp': '2025-09-10 02:49:37.958166', 'step': 11926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:38.011867', 'step': 11926, 'epoch': 2} {'type': 'loss', 'content': 0.13306619226932526, 'timestamp': '2025-09-10 02:49:38.014092', 'step': 11927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:38.067985', 'step': 11927, 'epoch': 2} {'type': 'loss', 'content': 0.11919575929641724, 'timestamp': '2025-09-10 02:49:38.073792', 'step': 11928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:38.126804', 'step': 11928, 'epoch': 2} {'type': 'loss', 'content': 0.05408896505832672, 'timestamp': '2025-09-10 02:49:38.128937', 'step': 11929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:38.182438', 'step': 11929, 'epoch': 2} {'type': 'loss', 'content': 0.10606610774993896, 'timestamp': '2025-09-10 02:49:38.184734', 'step': 11930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:38.238528', 'step': 11930, 'epoch': 2} {'type': 'loss', 'content': 0.1657693237066269, 'timestamp': '2025-09-10 02:49:38.240893', 'step': 11931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:38.294685', 'step': 11931, 'epoch': 2} {'type': 'loss', 'content': 0.10149512439966202, 'timestamp': '2025-09-10 02:49:38.300569', 'step': 11932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:38.353996', 'step': 11932, 'epoch': 2} {'type': 'loss', 'content': 0.1867142617702484, 'timestamp': '2025-09-10 02:49:38.356191', 'step': 11933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:38.411067', 'step': 11933, 'epoch': 2} {'type': 'loss', 'content': 0.054075174033641815, 'timestamp': '2025-09-10 02:49:38.413229', 'step': 11934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:38.468018', 'step': 11934, 'epoch': 2} {'type': 'loss', 'content': 0.14818686246871948, 'timestamp': '2025-09-10 02:49:38.470338', 'step': 11935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:38.524927', 'step': 11935, 'epoch': 2} {'type': 'loss', 'content': 0.09740012884140015, 'timestamp': '2025-09-10 02:49:38.531965', 'step': 11936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:38.585745', 'step': 11936, 'epoch': 2} {'type': 'loss', 'content': 0.10551087558269501, 'timestamp': '2025-09-10 02:49:38.588043', 'step': 11937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:38.642166', 'step': 11937, 'epoch': 2} {'type': 'loss', 'content': 0.09018254280090332, 'timestamp': '2025-09-10 02:49:38.644350', 'step': 11938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:38.698111', 'step': 11938, 'epoch': 2} {'type': 'loss', 'content': 0.057982560247182846, 'timestamp': '2025-09-10 02:49:38.700478', 'step': 11939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:38.754156', 'step': 11939, 'epoch': 2} {'type': 'loss', 'content': 0.14321713149547577, 'timestamp': '2025-09-10 02:49:38.760053', 'step': 11940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:38.813437', 'step': 11940, 'epoch': 2} {'type': 'loss', 'content': 0.12486787885427475, 'timestamp': '2025-09-10 02:49:38.815591', 'step': 11941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:38.872354', 'step': 11941, 'epoch': 2} {'type': 'loss', 'content': 0.10097683221101761, 'timestamp': '2025-09-10 02:49:38.874302', 'step': 11942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:38.927883', 'step': 11942, 'epoch': 2} {'type': 'loss', 'content': 0.0892554372549057, 'timestamp': '2025-09-10 02:49:38.930094', 'step': 11943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:38.984701', 'step': 11943, 'epoch': 2} {'type': 'loss', 'content': 0.14108887314796448, 'timestamp': '2025-09-10 02:49:38.990849', 'step': 11944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:39.044228', 'step': 11944, 'epoch': 2} {'type': 'loss', 'content': 0.051644258201122284, 'timestamp': '2025-09-10 02:49:39.046329', 'step': 11945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:39.100149', 'step': 11945, 'epoch': 2} {'type': 'loss', 'content': 0.15897174179553986, 'timestamp': '2025-09-10 02:49:39.102115', 'step': 11946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:39.155853', 'step': 11946, 'epoch': 2} {'type': 'loss', 'content': 0.14479775726795197, 'timestamp': '2025-09-10 02:49:39.157933', 'step': 11947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:39.211466', 'step': 11947, 'epoch': 2} {'type': 'loss', 'content': 0.078857421875, 'timestamp': '2025-09-10 02:49:39.217384', 'step': 11948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:39.270387', 'step': 11948, 'epoch': 2} {'type': 'loss', 'content': 0.08839548379182816, 'timestamp': '2025-09-10 02:49:39.272592', 'step': 11949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:39.326449', 'step': 11949, 'epoch': 2} {'type': 'loss', 'content': 0.09534440189599991, 'timestamp': '2025-09-10 02:49:39.329556', 'step': 11950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:39.384322', 'step': 11950, 'epoch': 2} {'type': 'loss', 'content': 0.11127576231956482, 'timestamp': '2025-09-10 02:49:39.386291', 'step': 11951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:39.442453', 'step': 11951, 'epoch': 2} {'type': 'loss', 'content': 0.09383277595043182, 'timestamp': '2025-09-10 02:49:39.448314', 'step': 11952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:39.502299', 'step': 11952, 'epoch': 2} {'type': 'loss', 'content': 0.127984419465065, 'timestamp': '2025-09-10 02:49:39.504367', 'step': 11953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:39.558105', 'step': 11953, 'epoch': 2} {'type': 'loss', 'content': 0.055991366505622864, 'timestamp': '2025-09-10 02:49:39.560287', 'step': 11954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:39.614070', 'step': 11954, 'epoch': 2} {'type': 'loss', 'content': 0.08262907713651657, 'timestamp': '2025-09-10 02:49:39.616319', 'step': 11955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:39.669808', 'step': 11955, 'epoch': 2} {'type': 'loss', 'content': 0.10647180676460266, 'timestamp': '2025-09-10 02:49:39.675896', 'step': 11956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:39.728677', 'step': 11956, 'epoch': 2} {'type': 'loss', 'content': 0.23162946105003357, 'timestamp': '2025-09-10 02:49:39.730783', 'step': 11957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:39.783674', 'step': 11957, 'epoch': 2} {'type': 'loss', 'content': 0.12900297343730927, 'timestamp': '2025-09-10 02:49:39.785613', 'step': 11958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:39.839704', 'step': 11958, 'epoch': 2} {'type': 'loss', 'content': 0.20471027493476868, 'timestamp': '2025-09-10 02:49:39.841844', 'step': 11959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:39.896440', 'step': 11959, 'epoch': 2} {'type': 'loss', 'content': 0.06901177018880844, 'timestamp': '2025-09-10 02:49:39.902307', 'step': 11960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:39.955408', 'step': 11960, 'epoch': 2} {'type': 'loss', 'content': 0.13257357478141785, 'timestamp': '2025-09-10 02:49:39.957569', 'step': 11961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:40.011157', 'step': 11961, 'epoch': 2} {'type': 'loss', 'content': 0.06887795031070709, 'timestamp': '2025-09-10 02:49:40.013460', 'step': 11962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:40.067920', 'step': 11962, 'epoch': 2} {'type': 'loss', 'content': 0.20594312250614166, 'timestamp': '2025-09-10 02:49:40.070110', 'step': 11963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:40.124367', 'step': 11963, 'epoch': 2} {'type': 'loss', 'content': 0.13387680053710938, 'timestamp': '2025-09-10 02:49:40.130475', 'step': 11964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:40.185102', 'step': 11964, 'epoch': 2} {'type': 'loss', 'content': 0.18367910385131836, 'timestamp': '2025-09-10 02:49:40.187741', 'step': 11965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:40.241105', 'step': 11965, 'epoch': 2} {'type': 'loss', 'content': 0.10228077322244644, 'timestamp': '2025-09-10 02:49:40.243539', 'step': 11966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:40.296910', 'step': 11966, 'epoch': 2} {'type': 'loss', 'content': 0.21603281795978546, 'timestamp': '2025-09-10 02:49:40.299004', 'step': 11967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:40.352397', 'step': 11967, 'epoch': 2} {'type': 'loss', 'content': 0.15372879803180695, 'timestamp': '2025-09-10 02:49:40.358279', 'step': 11968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:40.410931', 'step': 11968, 'epoch': 2} {'type': 'loss', 'content': 0.1180029958486557, 'timestamp': '2025-09-10 02:49:40.413076', 'step': 11969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:40.466773', 'step': 11969, 'epoch': 2} {'type': 'loss', 'content': 0.14833205938339233, 'timestamp': '2025-09-10 02:49:40.468864', 'step': 11970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:40.522275', 'step': 11970, 'epoch': 2} {'type': 'loss', 'content': 0.12494475394487381, 'timestamp': '2025-09-10 02:49:40.524376', 'step': 11971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:40.580024', 'step': 11971, 'epoch': 2} {'type': 'loss', 'content': 0.129919171333313, 'timestamp': '2025-09-10 02:49:40.586024', 'step': 11972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:40.640026', 'step': 11972, 'epoch': 2} {'type': 'loss', 'content': 0.1572495698928833, 'timestamp': '2025-09-10 02:49:40.642398', 'step': 11973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:40.696132', 'step': 11973, 'epoch': 2} {'type': 'loss', 'content': 0.17738980054855347, 'timestamp': '2025-09-10 02:49:40.698304', 'step': 11974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:40.752079', 'step': 11974, 'epoch': 2} {'type': 'loss', 'content': 0.11668998748064041, 'timestamp': '2025-09-10 02:49:40.754363', 'step': 11975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:40.807651', 'step': 11975, 'epoch': 2} {'type': 'loss', 'content': 0.10652139037847519, 'timestamp': '2025-09-10 02:49:40.813646', 'step': 11976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:40.866725', 'step': 11976, 'epoch': 2} {'type': 'loss', 'content': 0.10800566524267197, 'timestamp': '2025-09-10 02:49:40.868923', 'step': 11977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:40.922705', 'step': 11977, 'epoch': 2} {'type': 'loss', 'content': 0.14601753652095795, 'timestamp': '2025-09-10 02:49:40.925134', 'step': 11978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:40.979788', 'step': 11978, 'epoch': 2} {'type': 'loss', 'content': 0.12470574676990509, 'timestamp': '2025-09-10 02:49:40.982110', 'step': 11979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:41.035331', 'step': 11979, 'epoch': 2} {'type': 'loss', 'content': 0.12847723066806793, 'timestamp': '2025-09-10 02:49:41.041003', 'step': 11980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:41.094062', 'step': 11980, 'epoch': 2} {'type': 'loss', 'content': 0.1273774355649948, 'timestamp': '2025-09-10 02:49:41.096234', 'step': 11981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:41.149917', 'step': 11981, 'epoch': 2} {'type': 'loss', 'content': 0.16574174165725708, 'timestamp': '2025-09-10 02:49:41.152080', 'step': 11982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:41.205700', 'step': 11982, 'epoch': 2} {'type': 'loss', 'content': 0.08978526294231415, 'timestamp': '2025-09-10 02:49:41.207986', 'step': 11983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:41.261680', 'step': 11983, 'epoch': 2} {'type': 'loss', 'content': 0.08799292892217636, 'timestamp': '2025-09-10 02:49:41.267809', 'step': 11984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:41.320361', 'step': 11984, 'epoch': 2} {'type': 'loss', 'content': 0.06612513214349747, 'timestamp': '2025-09-10 02:49:41.322494', 'step': 11985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:41.377416', 'step': 11985, 'epoch': 2} {'type': 'loss', 'content': 0.12008863687515259, 'timestamp': '2025-09-10 02:49:41.379742', 'step': 11986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:41.433126', 'step': 11986, 'epoch': 2} {'type': 'loss', 'content': 0.13564640283584595, 'timestamp': '2025-09-10 02:49:41.435111', 'step': 11987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:41.489624', 'step': 11987, 'epoch': 2} {'type': 'loss', 'content': 0.07530366629362106, 'timestamp': '2025-09-10 02:49:41.495467', 'step': 11988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:41.549248', 'step': 11988, 'epoch': 2} {'type': 'loss', 'content': 0.15340958535671234, 'timestamp': '2025-09-10 02:49:41.551459', 'step': 11989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:41.605207', 'step': 11989, 'epoch': 2} {'type': 'loss', 'content': 0.11273340880870819, 'timestamp': '2025-09-10 02:49:41.607340', 'step': 11990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:41.661800', 'step': 11990, 'epoch': 2} {'type': 'loss', 'content': 0.11456183344125748, 'timestamp': '2025-09-10 02:49:41.663902', 'step': 11991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:41.717841', 'step': 11991, 'epoch': 2} {'type': 'loss', 'content': 0.13508465886116028, 'timestamp': '2025-09-10 02:49:41.723947', 'step': 11992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:41.777478', 'step': 11992, 'epoch': 2} {'type': 'loss', 'content': 0.06361320614814758, 'timestamp': '2025-09-10 02:49:41.779882', 'step': 11993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:41.834056', 'step': 11993, 'epoch': 2} {'type': 'loss', 'content': 0.24826399981975555, 'timestamp': '2025-09-10 02:49:41.836327', 'step': 11994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:41.889952', 'step': 11994, 'epoch': 2} {'type': 'loss', 'content': 0.14228828251361847, 'timestamp': '2025-09-10 02:49:41.892228', 'step': 11995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:41.946111', 'step': 11995, 'epoch': 2} {'type': 'loss', 'content': 0.14439605176448822, 'timestamp': '2025-09-10 02:49:41.952003', 'step': 11996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:42.004934', 'step': 11996, 'epoch': 2} {'type': 'loss', 'content': 0.10747415572404861, 'timestamp': '2025-09-10 02:49:42.007202', 'step': 11997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:42.061461', 'step': 11997, 'epoch': 2} {'type': 'loss', 'content': 0.18036265671253204, 'timestamp': '2025-09-10 02:49:42.063604', 'step': 11998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:42.117833', 'step': 11998, 'epoch': 2} {'type': 'loss', 'content': 0.10903633385896683, 'timestamp': '2025-09-10 02:49:42.119974', 'step': 11999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:42.174322', 'step': 11999, 'epoch': 2} {'type': 'loss', 'content': 0.1353425830602646, 'timestamp': '2025-09-10 02:49:42.180558', 'step': 12000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12000', 'timestamp': '2025-09-10 02:49:42.625040', 'step': 12000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:42.683217', 'step': 12000, 'epoch': 2} {'type': 'loss', 'content': 0.14703696966171265, 'timestamp': '2025-09-10 02:49:42.685522', 'step': 12001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:42.741641', 'step': 12001, 'epoch': 2} {'type': 'loss', 'content': 0.07034023106098175, 'timestamp': '2025-09-10 02:49:42.743602', 'step': 12002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:42.798409', 'step': 12002, 'epoch': 2} {'type': 'loss', 'content': 0.20336481928825378, 'timestamp': '2025-09-10 02:49:42.800326', 'step': 12003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:42.855225', 'step': 12003, 'epoch': 2} {'type': 'loss', 'content': 0.04233860224485397, 'timestamp': '2025-09-10 02:49:42.861394', 'step': 12004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:42.914928', 'step': 12004, 'epoch': 2} {'type': 'loss', 'content': 0.17917856574058533, 'timestamp': '2025-09-10 02:49:42.917067', 'step': 12005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:42.973556', 'step': 12005, 'epoch': 2} {'type': 'loss', 'content': 0.06605716049671173, 'timestamp': '2025-09-10 02:49:42.976711', 'step': 12006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:43.030868', 'step': 12006, 'epoch': 2} {'type': 'loss', 'content': 0.09711869806051254, 'timestamp': '2025-09-10 02:49:43.033267', 'step': 12007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:43.088286', 'step': 12007, 'epoch': 2} {'type': 'loss', 'content': 0.13846705853939056, 'timestamp': '2025-09-10 02:49:43.094501', 'step': 12008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:43.147731', 'step': 12008, 'epoch': 2} {'type': 'loss', 'content': 0.11010473221540451, 'timestamp': '2025-09-10 02:49:43.149610', 'step': 12009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:43.203395', 'step': 12009, 'epoch': 2} {'type': 'loss', 'content': 0.1593281626701355, 'timestamp': '2025-09-10 02:49:43.205574', 'step': 12010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:43.260871', 'step': 12010, 'epoch': 2} {'type': 'loss', 'content': 0.044711291790008545, 'timestamp': '2025-09-10 02:49:43.263105', 'step': 12011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:43.317325', 'step': 12011, 'epoch': 2} {'type': 'loss', 'content': 0.07962962239980698, 'timestamp': '2025-09-10 02:49:43.323494', 'step': 12012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:43.378546', 'step': 12012, 'epoch': 2} {'type': 'loss', 'content': 0.09808886051177979, 'timestamp': '2025-09-10 02:49:43.380725', 'step': 12013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:43.435137', 'step': 12013, 'epoch': 2} {'type': 'loss', 'content': 0.11586660146713257, 'timestamp': '2025-09-10 02:49:43.437940', 'step': 12014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:43.492188', 'step': 12014, 'epoch': 2} {'type': 'loss', 'content': 0.13870437443256378, 'timestamp': '2025-09-10 02:49:43.494501', 'step': 12015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:43.549282', 'step': 12015, 'epoch': 2} {'type': 'loss', 'content': 0.05703052505850792, 'timestamp': '2025-09-10 02:49:43.555420', 'step': 12016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:43.609071', 'step': 12016, 'epoch': 2} {'type': 'loss', 'content': 0.13067927956581116, 'timestamp': '2025-09-10 02:49:43.610997', 'step': 12017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:43.665205', 'step': 12017, 'epoch': 2} {'type': 'loss', 'content': 0.08215764909982681, 'timestamp': '2025-09-10 02:49:43.667303', 'step': 12018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:43.731315', 'step': 12018, 'epoch': 2} {'type': 'loss', 'content': 0.06232695281505585, 'timestamp': '2025-09-10 02:49:43.733442', 'step': 12019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:43.787674', 'step': 12019, 'epoch': 2} {'type': 'loss', 'content': 0.09301155805587769, 'timestamp': '2025-09-10 02:49:43.794178', 'step': 12020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:43.848935', 'step': 12020, 'epoch': 2} {'type': 'loss', 'content': 0.11566602438688278, 'timestamp': '2025-09-10 02:49:43.850987', 'step': 12021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:43.905273', 'step': 12021, 'epoch': 2} {'type': 'loss', 'content': 0.11523154377937317, 'timestamp': '2025-09-10 02:49:43.907397', 'step': 12022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:43.961654', 'step': 12022, 'epoch': 2} {'type': 'loss', 'content': 0.1060875728726387, 'timestamp': '2025-09-10 02:49:43.963540', 'step': 12023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:44.018093', 'step': 12023, 'epoch': 2} {'type': 'loss', 'content': 0.16047392785549164, 'timestamp': '2025-09-10 02:49:44.024131', 'step': 12024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:44.077270', 'step': 12024, 'epoch': 2} {'type': 'loss', 'content': 0.10098188370466232, 'timestamp': '2025-09-10 02:49:44.079227', 'step': 12025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:44.133306', 'step': 12025, 'epoch': 2} {'type': 'loss', 'content': 0.06716537475585938, 'timestamp': '2025-09-10 02:49:44.135490', 'step': 12026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:44.192180', 'step': 12026, 'epoch': 2} {'type': 'loss', 'content': 0.1738603562116623, 'timestamp': '2025-09-10 02:49:44.194228', 'step': 12027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:44.248398', 'step': 12027, 'epoch': 2} {'type': 'loss', 'content': 0.060200292617082596, 'timestamp': '2025-09-10 02:49:44.254444', 'step': 12028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:44.308866', 'step': 12028, 'epoch': 2} {'type': 'loss', 'content': 0.16932368278503418, 'timestamp': '2025-09-10 02:49:44.311032', 'step': 12029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:44.366765', 'step': 12029, 'epoch': 2} {'type': 'loss', 'content': 0.18175838887691498, 'timestamp': '2025-09-10 02:49:44.369055', 'step': 12030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:44.424333', 'step': 12030, 'epoch': 2} {'type': 'loss', 'content': 0.08116616308689117, 'timestamp': '2025-09-10 02:49:44.426568', 'step': 12031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:44.481437', 'step': 12031, 'epoch': 2} {'type': 'loss', 'content': 0.1208285391330719, 'timestamp': '2025-09-10 02:49:44.487594', 'step': 12032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:44.541538', 'step': 12032, 'epoch': 2} {'type': 'loss', 'content': 0.14771263301372528, 'timestamp': '2025-09-10 02:49:44.543596', 'step': 12033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:44.597085', 'step': 12033, 'epoch': 2} {'type': 'loss', 'content': 0.09800119698047638, 'timestamp': '2025-09-10 02:49:44.599231', 'step': 12034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:44.655099', 'step': 12034, 'epoch': 2} {'type': 'loss', 'content': 0.12451675534248352, 'timestamp': '2025-09-10 02:49:44.657531', 'step': 12035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:44.712432', 'step': 12035, 'epoch': 2} {'type': 'loss', 'content': 0.09038004279136658, 'timestamp': '2025-09-10 02:49:44.719121', 'step': 12036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:44.772876', 'step': 12036, 'epoch': 2} {'type': 'loss', 'content': 0.10886821895837784, 'timestamp': '2025-09-10 02:49:44.775447', 'step': 12037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:44.831173', 'step': 12037, 'epoch': 2} {'type': 'loss', 'content': 0.12339034676551819, 'timestamp': '2025-09-10 02:49:44.833472', 'step': 12038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:44.890330', 'step': 12038, 'epoch': 2} {'type': 'loss', 'content': 0.031935736536979675, 'timestamp': '2025-09-10 02:49:44.892698', 'step': 12039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:44.948356', 'step': 12039, 'epoch': 2} {'type': 'loss', 'content': 0.07662416994571686, 'timestamp': '2025-09-10 02:49:44.954519', 'step': 12040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:45.010911', 'step': 12040, 'epoch': 2} {'type': 'loss', 'content': 0.09760130196809769, 'timestamp': '2025-09-10 02:49:45.013233', 'step': 12041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:45.069857', 'step': 12041, 'epoch': 2} {'type': 'loss', 'content': 0.12725770473480225, 'timestamp': '2025-09-10 02:49:45.072231', 'step': 12042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:45.126877', 'step': 12042, 'epoch': 2} {'type': 'loss', 'content': 0.2231256514787674, 'timestamp': '2025-09-10 02:49:45.129201', 'step': 12043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:45.183852', 'step': 12043, 'epoch': 2} {'type': 'loss', 'content': 0.06573952734470367, 'timestamp': '2025-09-10 02:49:45.189967', 'step': 12044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:45.246624', 'step': 12044, 'epoch': 2} {'type': 'loss', 'content': 0.17641906440258026, 'timestamp': '2025-09-10 02:49:45.248699', 'step': 12045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:45.303952', 'step': 12045, 'epoch': 2} {'type': 'loss', 'content': 0.086522676050663, 'timestamp': '2025-09-10 02:49:45.306360', 'step': 12046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:45.361379', 'step': 12046, 'epoch': 2} {'type': 'loss', 'content': 0.08873970806598663, 'timestamp': '2025-09-10 02:49:45.363800', 'step': 12047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:45.420534', 'step': 12047, 'epoch': 2} {'type': 'loss', 'content': 0.1652315855026245, 'timestamp': '2025-09-10 02:49:45.426788', 'step': 12048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:45.481211', 'step': 12048, 'epoch': 2} {'type': 'loss', 'content': 0.0997292771935463, 'timestamp': '2025-09-10 02:49:45.483700', 'step': 12049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:45.538967', 'step': 12049, 'epoch': 2} {'type': 'loss', 'content': 0.08644629269838333, 'timestamp': '2025-09-10 02:49:45.541151', 'step': 12050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:45.597358', 'step': 12050, 'epoch': 2} {'type': 'loss', 'content': 0.07418458163738251, 'timestamp': '2025-09-10 02:49:45.599660', 'step': 12051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:45.655449', 'step': 12051, 'epoch': 2} {'type': 'loss', 'content': 0.11730586737394333, 'timestamp': '2025-09-10 02:49:45.661800', 'step': 12052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:45.716446', 'step': 12052, 'epoch': 2} {'type': 'loss', 'content': 0.1078740656375885, 'timestamp': '2025-09-10 02:49:45.718780', 'step': 12053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:45.773910', 'step': 12053, 'epoch': 2} {'type': 'loss', 'content': 0.22141316533088684, 'timestamp': '2025-09-10 02:49:45.776093', 'step': 12054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:45.831147', 'step': 12054, 'epoch': 2} {'type': 'loss', 'content': 0.14125755429267883, 'timestamp': '2025-09-10 02:49:45.833341', 'step': 12055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:45.887231', 'step': 12055, 'epoch': 2} {'type': 'loss', 'content': 0.08794789016246796, 'timestamp': '2025-09-10 02:49:45.893430', 'step': 12056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:45.952236', 'step': 12056, 'epoch': 2} {'type': 'loss', 'content': 0.14581641554832458, 'timestamp': '2025-09-10 02:49:45.954221', 'step': 12057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:46.009261', 'step': 12057, 'epoch': 2} {'type': 'loss', 'content': 0.07824742794036865, 'timestamp': '2025-09-10 02:49:46.011576', 'step': 12058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:46.065435', 'step': 12058, 'epoch': 2} {'type': 'loss', 'content': 0.13465282320976257, 'timestamp': '2025-09-10 02:49:46.067494', 'step': 12059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:46.122212', 'step': 12059, 'epoch': 2} {'type': 'loss', 'content': 0.14003805816173553, 'timestamp': '2025-09-10 02:49:46.128560', 'step': 12060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:46.183355', 'step': 12060, 'epoch': 2} {'type': 'loss', 'content': 0.15837478637695312, 'timestamp': '2025-09-10 02:49:46.185295', 'step': 12061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:46.243299', 'step': 12061, 'epoch': 2} {'type': 'loss', 'content': 0.08188025653362274, 'timestamp': '2025-09-10 02:49:46.246573', 'step': 12062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:46.301831', 'step': 12062, 'epoch': 2} {'type': 'loss', 'content': 0.1600257009267807, 'timestamp': '2025-09-10 02:49:46.303983', 'step': 12063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:46.358357', 'step': 12063, 'epoch': 2} {'type': 'loss', 'content': 0.10573671758174896, 'timestamp': '2025-09-10 02:49:46.364902', 'step': 12064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:46.419527', 'step': 12064, 'epoch': 2} {'type': 'loss', 'content': 0.1161634773015976, 'timestamp': '2025-09-10 02:49:46.421797', 'step': 12065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:46.477852', 'step': 12065, 'epoch': 2} {'type': 'loss', 'content': 0.10666824877262115, 'timestamp': '2025-09-10 02:49:46.480205', 'step': 12066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:46.536122', 'step': 12066, 'epoch': 2} {'type': 'loss', 'content': 0.07705987244844437, 'timestamp': '2025-09-10 02:49:46.538365', 'step': 12067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:46.596865', 'step': 12067, 'epoch': 2} {'type': 'loss', 'content': 0.10851050913333893, 'timestamp': '2025-09-10 02:49:46.603140', 'step': 12068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:46.658374', 'step': 12068, 'epoch': 2} {'type': 'loss', 'content': 0.09597457200288773, 'timestamp': '2025-09-10 02:49:46.660609', 'step': 12069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:46.716441', 'step': 12069, 'epoch': 2} {'type': 'loss', 'content': 0.10958036780357361, 'timestamp': '2025-09-10 02:49:46.718439', 'step': 12070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:46.773880', 'step': 12070, 'epoch': 2} {'type': 'loss', 'content': 0.16320723295211792, 'timestamp': '2025-09-10 02:49:46.777542', 'step': 12071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:46.842335', 'step': 12071, 'epoch': 2} {'type': 'loss', 'content': 0.09269673377275467, 'timestamp': '2025-09-10 02:49:46.848532', 'step': 12072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:46.916952', 'step': 12072, 'epoch': 2} {'type': 'loss', 'content': 0.25770294666290283, 'timestamp': '2025-09-10 02:49:46.918948', 'step': 12073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:46.974988', 'step': 12073, 'epoch': 2} {'type': 'loss', 'content': 0.09652355313301086, 'timestamp': '2025-09-10 02:49:46.976917', 'step': 12074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:47.034940', 'step': 12074, 'epoch': 2} {'type': 'loss', 'content': 0.0933653712272644, 'timestamp': '2025-09-10 02:49:47.038982', 'step': 12075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:47.099591', 'step': 12075, 'epoch': 2} {'type': 'loss', 'content': 0.09614496678113937, 'timestamp': '2025-09-10 02:49:47.105941', 'step': 12076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:47.160738', 'step': 12076, 'epoch': 2} {'type': 'loss', 'content': 0.07026339322328568, 'timestamp': '2025-09-10 02:49:47.163024', 'step': 12077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:47.218062', 'step': 12077, 'epoch': 2} {'type': 'loss', 'content': 0.06893154233694077, 'timestamp': '2025-09-10 02:49:47.220415', 'step': 12078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:47.275658', 'step': 12078, 'epoch': 2} {'type': 'loss', 'content': 0.10628300905227661, 'timestamp': '2025-09-10 02:49:47.278765', 'step': 12079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:47.333250', 'step': 12079, 'epoch': 2} {'type': 'loss', 'content': 0.24739474058151245, 'timestamp': '2025-09-10 02:49:47.339510', 'step': 12080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:47.394135', 'step': 12080, 'epoch': 2} {'type': 'loss', 'content': 0.11874768137931824, 'timestamp': '2025-09-10 02:49:47.396455', 'step': 12081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:47.451204', 'step': 12081, 'epoch': 2} {'type': 'loss', 'content': 0.12329147011041641, 'timestamp': '2025-09-10 02:49:47.453184', 'step': 12082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:47.507973', 'step': 12082, 'epoch': 2} {'type': 'loss', 'content': 0.05809986963868141, 'timestamp': '2025-09-10 02:49:47.510739', 'step': 12083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:47.568102', 'step': 12083, 'epoch': 2} {'type': 'loss', 'content': 0.10221290588378906, 'timestamp': '2025-09-10 02:49:47.574578', 'step': 12084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:47.631011', 'step': 12084, 'epoch': 2} {'type': 'loss', 'content': 0.2982123792171478, 'timestamp': '2025-09-10 02:49:47.633224', 'step': 12085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:47.687827', 'step': 12085, 'epoch': 2} {'type': 'loss', 'content': 0.16117233037948608, 'timestamp': '2025-09-10 02:49:47.690122', 'step': 12086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:49:47.745306', 'step': 12086, 'epoch': 2} {'type': 'loss', 'content': 0.11089392751455307, 'timestamp': '2025-09-10 02:49:47.747609', 'step': 12087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:47.802303', 'step': 12087, 'epoch': 2} {'type': 'loss', 'content': 0.06074492260813713, 'timestamp': '2025-09-10 02:49:47.808599', 'step': 12088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:47.864214', 'step': 12088, 'epoch': 2} {'type': 'loss', 'content': 0.16346758604049683, 'timestamp': '2025-09-10 02:49:47.866434', 'step': 12089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:47.921270', 'step': 12089, 'epoch': 2} {'type': 'loss', 'content': 0.14046478271484375, 'timestamp': '2025-09-10 02:49:47.923558', 'step': 12090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:47.978738', 'step': 12090, 'epoch': 2} {'type': 'loss', 'content': 0.07933786511421204, 'timestamp': '2025-09-10 02:49:47.980910', 'step': 12091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:48.038969', 'step': 12091, 'epoch': 2} {'type': 'loss', 'content': 0.10955862700939178, 'timestamp': '2025-09-10 02:49:48.047792', 'step': 12092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:48.103408', 'step': 12092, 'epoch': 2} {'type': 'loss', 'content': 0.1515340507030487, 'timestamp': '2025-09-10 02:49:48.105743', 'step': 12093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:48.161119', 'step': 12093, 'epoch': 2} {'type': 'loss', 'content': 0.11256526410579681, 'timestamp': '2025-09-10 02:49:48.163349', 'step': 12094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:48.220360', 'step': 12094, 'epoch': 2} {'type': 'loss', 'content': 0.09692346304655075, 'timestamp': '2025-09-10 02:49:48.222694', 'step': 12095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:48.277998', 'step': 12095, 'epoch': 2} {'type': 'loss', 'content': 0.07698807120323181, 'timestamp': '2025-09-10 02:49:48.284293', 'step': 12096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:48.341339', 'step': 12096, 'epoch': 2} {'type': 'loss', 'content': 0.1620945781469345, 'timestamp': '2025-09-10 02:49:48.343357', 'step': 12097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:48.398211', 'step': 12097, 'epoch': 2} {'type': 'loss', 'content': 0.15062449872493744, 'timestamp': '2025-09-10 02:49:48.400420', 'step': 12098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:48.455620', 'step': 12098, 'epoch': 2} {'type': 'loss', 'content': 0.21228694915771484, 'timestamp': '2025-09-10 02:49:48.457672', 'step': 12099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:48.513674', 'step': 12099, 'epoch': 2} {'type': 'loss', 'content': 0.17441494762897491, 'timestamp': '2025-09-10 02:49:48.519955', 'step': 12100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:48.573529', 'step': 12100, 'epoch': 2} {'type': 'loss', 'content': 0.1256633698940277, 'timestamp': '2025-09-10 02:49:48.575579', 'step': 12101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:48.630702', 'step': 12101, 'epoch': 2} {'type': 'loss', 'content': 0.1005910187959671, 'timestamp': '2025-09-10 02:49:48.632928', 'step': 12102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:48.686894', 'step': 12102, 'epoch': 2} {'type': 'loss', 'content': 0.05159521475434303, 'timestamp': '2025-09-10 02:49:48.689051', 'step': 12103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:48.745541', 'step': 12103, 'epoch': 2} {'type': 'loss', 'content': 0.16157636046409607, 'timestamp': '2025-09-10 02:49:48.751808', 'step': 12104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:48.806380', 'step': 12104, 'epoch': 2} {'type': 'loss', 'content': 0.1452617645263672, 'timestamp': '2025-09-10 02:49:48.808686', 'step': 12105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:48.864477', 'step': 12105, 'epoch': 2} {'type': 'loss', 'content': 0.09778913855552673, 'timestamp': '2025-09-10 02:49:48.866716', 'step': 12106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:48.921713', 'step': 12106, 'epoch': 2} {'type': 'loss', 'content': 0.15001428127288818, 'timestamp': '2025-09-10 02:49:48.924122', 'step': 12107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:48.978771', 'step': 12107, 'epoch': 2} {'type': 'loss', 'content': 0.09875118732452393, 'timestamp': '2025-09-10 02:49:48.985259', 'step': 12108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:49.038960', 'step': 12108, 'epoch': 2} {'type': 'loss', 'content': 0.10657450556755066, 'timestamp': '2025-09-10 02:49:49.041140', 'step': 12109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:49.095459', 'step': 12109, 'epoch': 2} {'type': 'loss', 'content': 0.12649239599704742, 'timestamp': '2025-09-10 02:49:49.097671', 'step': 12110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:49.152767', 'step': 12110, 'epoch': 2} {'type': 'loss', 'content': 0.23357272148132324, 'timestamp': '2025-09-10 02:49:49.154727', 'step': 12111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:49:49.209368', 'step': 12111, 'epoch': 2} {'type': 'loss', 'content': 0.05750761181116104, 'timestamp': '2025-09-10 02:49:49.215754', 'step': 12112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:49.270191', 'step': 12112, 'epoch': 2} {'type': 'loss', 'content': 0.10967464745044708, 'timestamp': '2025-09-10 02:49:49.272486', 'step': 12113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:49:49.327027', 'step': 12113, 'epoch': 2} {'type': 'loss', 'content': 0.21015426516532898, 'timestamp': '2025-09-10 02:49:49.329198', 'step': 12114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:49.383560', 'step': 12114, 'epoch': 2} {'type': 'loss', 'content': 0.1389816850423813, 'timestamp': '2025-09-10 02:49:49.385715', 'step': 12115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:49.439430', 'step': 12115, 'epoch': 2} {'type': 'loss', 'content': 0.07231427729129791, 'timestamp': '2025-09-10 02:49:49.445619', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:50:02.044047', 'step': 12116, 'epoch': 2} {'type': 'pplx', 'content': 13562.729494076091, 'timestamp': '2025-09-10 02:50:02.047076', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.101191', 'step': 12116, 'epoch': 2} {'type': 'loss', 'content': 0.1011139526963234, 'timestamp': '2025-09-10 02:50:02.103213', 'step': 12117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.157590', 'step': 12117, 'epoch': 2} {'type': 'loss', 'content': 0.17366832494735718, 'timestamp': '2025-09-10 02:50:02.159660', 'step': 12118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:02.212845', 'step': 12118, 'epoch': 2} {'type': 'loss', 'content': 0.10585708171129227, 'timestamp': '2025-09-10 02:50:02.214984', 'step': 12119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:02.268283', 'step': 12119, 'epoch': 2} {'type': 'loss', 'content': 0.175114244222641, 'timestamp': '2025-09-10 02:50:02.274977', 'step': 12120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.327552', 'step': 12120, 'epoch': 2} {'type': 'loss', 'content': 0.08297494798898697, 'timestamp': '2025-09-10 02:50:02.329797', 'step': 12121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.382681', 'step': 12121, 'epoch': 2} {'type': 'loss', 'content': 0.12589995563030243, 'timestamp': '2025-09-10 02:50:02.384873', 'step': 12122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:02.437776', 'step': 12122, 'epoch': 2} {'type': 'loss', 'content': 0.12866321206092834, 'timestamp': '2025-09-10 02:50:02.439927', 'step': 12123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.492710', 'step': 12123, 'epoch': 2} {'type': 'loss', 'content': 0.1006971001625061, 'timestamp': '2025-09-10 02:50:02.498663', 'step': 12124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.557095', 'step': 12124, 'epoch': 2} {'type': 'loss', 'content': 0.0793851688504219, 'timestamp': '2025-09-10 02:50:02.559316', 'step': 12125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.613194', 'step': 12125, 'epoch': 2} {'type': 'loss', 'content': 0.14788809418678284, 'timestamp': '2025-09-10 02:50:02.615134', 'step': 12126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:02.668533', 'step': 12126, 'epoch': 2} {'type': 'loss', 'content': 0.08882709592580795, 'timestamp': '2025-09-10 02:50:02.670573', 'step': 12127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.724636', 'step': 12127, 'epoch': 2} {'type': 'loss', 'content': 0.14398176968097687, 'timestamp': '2025-09-10 02:50:02.730618', 'step': 12128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:02.783270', 'step': 12128, 'epoch': 2} {'type': 'loss', 'content': 0.10101014375686646, 'timestamp': '2025-09-10 02:50:02.785378', 'step': 12129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:02.838981', 'step': 12129, 'epoch': 2} {'type': 'loss', 'content': 0.16938188672065735, 'timestamp': '2025-09-10 02:50:02.841187', 'step': 12130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.895098', 'step': 12130, 'epoch': 2} {'type': 'loss', 'content': 0.161476269364357, 'timestamp': '2025-09-10 02:50:02.897244', 'step': 12131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:02.950620', 'step': 12131, 'epoch': 2} {'type': 'loss', 'content': 0.2098844349384308, 'timestamp': '2025-09-10 02:50:02.956239', 'step': 12132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:03.009324', 'step': 12132, 'epoch': 2} {'type': 'loss', 'content': 0.13018353283405304, 'timestamp': '2025-09-10 02:50:03.011199', 'step': 12133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:03.064453', 'step': 12133, 'epoch': 2} {'type': 'loss', 'content': 0.12299502640962601, 'timestamp': '2025-09-10 02:50:03.066612', 'step': 12134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:03.121724', 'step': 12134, 'epoch': 2} {'type': 'loss', 'content': 0.14874807000160217, 'timestamp': '2025-09-10 02:50:03.123885', 'step': 12135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:03.178286', 'step': 12135, 'epoch': 2} {'type': 'loss', 'content': 0.14304448664188385, 'timestamp': '2025-09-10 02:50:03.184117', 'step': 12136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:03.240917', 'step': 12136, 'epoch': 2} {'type': 'loss', 'content': 0.17595410346984863, 'timestamp': '2025-09-10 02:50:03.242871', 'step': 12137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:03.295914', 'step': 12137, 'epoch': 2} {'type': 'loss', 'content': 0.11184854805469513, 'timestamp': '2025-09-10 02:50:03.297962', 'step': 12138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:03.351107', 'step': 12138, 'epoch': 2} {'type': 'loss', 'content': 0.18028010427951813, 'timestamp': '2025-09-10 02:50:03.353209', 'step': 12139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:03.406645', 'step': 12139, 'epoch': 2} {'type': 'loss', 'content': 0.11615163087844849, 'timestamp': '2025-09-10 02:50:03.416194', 'step': 12140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:03.470645', 'step': 12140, 'epoch': 2} {'type': 'loss', 'content': 0.09756314754486084, 'timestamp': '2025-09-10 02:50:03.473197', 'step': 12141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:03.529344', 'step': 12141, 'epoch': 2} {'type': 'loss', 'content': 0.13019870221614838, 'timestamp': '2025-09-10 02:50:03.531357', 'step': 12142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:03.585136', 'step': 12142, 'epoch': 2} {'type': 'loss', 'content': 0.05989363417029381, 'timestamp': '2025-09-10 02:50:03.587569', 'step': 12143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:03.648535', 'step': 12143, 'epoch': 2} {'type': 'loss', 'content': 0.14551429450511932, 'timestamp': '2025-09-10 02:50:03.654640', 'step': 12144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:03.707768', 'step': 12144, 'epoch': 2} {'type': 'loss', 'content': 0.1475357860326767, 'timestamp': '2025-09-10 02:50:03.709939', 'step': 12145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:03.763543', 'step': 12145, 'epoch': 2} {'type': 'loss', 'content': 0.13913753628730774, 'timestamp': '2025-09-10 02:50:03.768041', 'step': 12146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:03.822825', 'step': 12146, 'epoch': 2} {'type': 'loss', 'content': 0.14502482116222382, 'timestamp': '2025-09-10 02:50:03.824771', 'step': 12147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:03.877850', 'step': 12147, 'epoch': 2} {'type': 'loss', 'content': 0.12107465416193008, 'timestamp': '2025-09-10 02:50:03.883728', 'step': 12148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:03.937927', 'step': 12148, 'epoch': 2} {'type': 'loss', 'content': 0.03771097585558891, 'timestamp': '2025-09-10 02:50:03.940050', 'step': 12149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:03.998162', 'step': 12149, 'epoch': 2} {'type': 'loss', 'content': 0.08139777928590775, 'timestamp': '2025-09-10 02:50:04.000312', 'step': 12150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:04.057980', 'step': 12150, 'epoch': 2} {'type': 'loss', 'content': 0.1592976152896881, 'timestamp': '2025-09-10 02:50:04.059916', 'step': 12151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:04.113382', 'step': 12151, 'epoch': 2} {'type': 'loss', 'content': 0.08250303566455841, 'timestamp': '2025-09-10 02:50:04.119603', 'step': 12152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:04.172908', 'step': 12152, 'epoch': 2} {'type': 'loss', 'content': 0.0836050733923912, 'timestamp': '2025-09-10 02:50:04.175048', 'step': 12153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:04.228306', 'step': 12153, 'epoch': 2} {'type': 'loss', 'content': 0.12722358107566833, 'timestamp': '2025-09-10 02:50:04.230293', 'step': 12154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:04.283771', 'step': 12154, 'epoch': 2} {'type': 'loss', 'content': 0.11178886890411377, 'timestamp': '2025-09-10 02:50:04.285781', 'step': 12155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:04.340386', 'step': 12155, 'epoch': 2} {'type': 'loss', 'content': 0.10549944639205933, 'timestamp': '2025-09-10 02:50:04.346258', 'step': 12156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:04.400177', 'step': 12156, 'epoch': 2} {'type': 'loss', 'content': 0.07433681190013885, 'timestamp': '2025-09-10 02:50:04.402453', 'step': 12157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:04.456633', 'step': 12157, 'epoch': 2} {'type': 'loss', 'content': 0.17765311896800995, 'timestamp': '2025-09-10 02:50:04.458915', 'step': 12158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:04.512918', 'step': 12158, 'epoch': 2} {'type': 'loss', 'content': 0.10653910785913467, 'timestamp': '2025-09-10 02:50:04.515082', 'step': 12159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:04.569079', 'step': 12159, 'epoch': 2} {'type': 'loss', 'content': 0.2591400444507599, 'timestamp': '2025-09-10 02:50:04.575198', 'step': 12160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:04.628412', 'step': 12160, 'epoch': 2} {'type': 'loss', 'content': 0.1243903636932373, 'timestamp': '2025-09-10 02:50:04.630480', 'step': 12161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:04.686423', 'step': 12161, 'epoch': 2} {'type': 'loss', 'content': 0.07389312237501144, 'timestamp': '2025-09-10 02:50:04.688483', 'step': 12162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:04.744168', 'step': 12162, 'epoch': 2} {'type': 'loss', 'content': 0.06850164383649826, 'timestamp': '2025-09-10 02:50:04.746157', 'step': 12163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:04.801442', 'step': 12163, 'epoch': 2} {'type': 'loss', 'content': 0.14846812188625336, 'timestamp': '2025-09-10 02:50:04.807347', 'step': 12164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:04.861207', 'step': 12164, 'epoch': 2} {'type': 'loss', 'content': 0.0830826535820961, 'timestamp': '2025-09-10 02:50:04.863185', 'step': 12165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:04.917214', 'step': 12165, 'epoch': 2} {'type': 'loss', 'content': 0.09492999315261841, 'timestamp': '2025-09-10 02:50:04.919520', 'step': 12166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:04.973091', 'step': 12166, 'epoch': 2} {'type': 'loss', 'content': 0.10925479233264923, 'timestamp': '2025-09-10 02:50:04.975463', 'step': 12167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:05.029075', 'step': 12167, 'epoch': 2} {'type': 'loss', 'content': 0.1381620317697525, 'timestamp': '2025-09-10 02:50:05.034846', 'step': 12168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:05.088179', 'step': 12168, 'epoch': 2} {'type': 'loss', 'content': 0.07408218085765839, 'timestamp': '2025-09-10 02:50:05.090144', 'step': 12169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:05.144960', 'step': 12169, 'epoch': 2} {'type': 'loss', 'content': 0.08556810021400452, 'timestamp': '2025-09-10 02:50:05.147129', 'step': 12170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:05.201798', 'step': 12170, 'epoch': 2} {'type': 'loss', 'content': 0.14053085446357727, 'timestamp': '2025-09-10 02:50:05.203853', 'step': 12171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:05.258348', 'step': 12171, 'epoch': 2} {'type': 'loss', 'content': 0.12677185237407684, 'timestamp': '2025-09-10 02:50:05.264341', 'step': 12172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:05.317971', 'step': 12172, 'epoch': 2} {'type': 'loss', 'content': 0.1707584708929062, 'timestamp': '2025-09-10 02:50:05.319835', 'step': 12173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:05.372833', 'step': 12173, 'epoch': 2} {'type': 'loss', 'content': 0.3380773663520813, 'timestamp': '2025-09-10 02:50:05.375051', 'step': 12174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:05.428666', 'step': 12174, 'epoch': 2} {'type': 'loss', 'content': 0.19169281423091888, 'timestamp': '2025-09-10 02:50:05.430723', 'step': 12175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:05.483516', 'step': 12175, 'epoch': 2} {'type': 'loss', 'content': 0.17001813650131226, 'timestamp': '2025-09-10 02:50:05.489396', 'step': 12176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:05.541864', 'step': 12176, 'epoch': 2} {'type': 'loss', 'content': 0.1574738323688507, 'timestamp': '2025-09-10 02:50:05.543944', 'step': 12177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:05.596788', 'step': 12177, 'epoch': 2} {'type': 'loss', 'content': 0.11082348972558975, 'timestamp': '2025-09-10 02:50:05.598724', 'step': 12178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:05.652810', 'step': 12178, 'epoch': 2} {'type': 'loss', 'content': 0.1322297751903534, 'timestamp': '2025-09-10 02:50:05.654879', 'step': 12179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:05.710548', 'step': 12179, 'epoch': 2} {'type': 'loss', 'content': 0.06478144973516464, 'timestamp': '2025-09-10 02:50:05.716591', 'step': 12180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:05.774488', 'step': 12180, 'epoch': 2} {'type': 'loss', 'content': 0.2023647129535675, 'timestamp': '2025-09-10 02:50:05.776485', 'step': 12181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:05.829676', 'step': 12181, 'epoch': 2} {'type': 'loss', 'content': 0.09381549805402756, 'timestamp': '2025-09-10 02:50:05.831745', 'step': 12182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:05.886944', 'step': 12182, 'epoch': 2} {'type': 'loss', 'content': 0.08372335880994797, 'timestamp': '2025-09-10 02:50:05.892648', 'step': 12183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:05.953782', 'step': 12183, 'epoch': 2} {'type': 'loss', 'content': 0.07662463188171387, 'timestamp': '2025-09-10 02:50:05.959643', 'step': 12184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:06.012874', 'step': 12184, 'epoch': 2} {'type': 'loss', 'content': 0.08761599659919739, 'timestamp': '2025-09-10 02:50:06.014903', 'step': 12185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:06.068353', 'step': 12185, 'epoch': 2} {'type': 'loss', 'content': 0.27527445554733276, 'timestamp': '2025-09-10 02:50:06.070439', 'step': 12186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:06.127288', 'step': 12186, 'epoch': 2} {'type': 'loss', 'content': 0.19005630910396576, 'timestamp': '2025-09-10 02:50:06.129477', 'step': 12187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:06.182161', 'step': 12187, 'epoch': 2} {'type': 'loss', 'content': 0.1808135062456131, 'timestamp': '2025-09-10 02:50:06.188315', 'step': 12188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:06.241883', 'step': 12188, 'epoch': 2} {'type': 'loss', 'content': 0.05523141473531723, 'timestamp': '2025-09-10 02:50:06.243850', 'step': 12189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:06.298050', 'step': 12189, 'epoch': 2} {'type': 'loss', 'content': 0.07426346093416214, 'timestamp': '2025-09-10 02:50:06.304996', 'step': 12190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:06.363112', 'step': 12190, 'epoch': 2} {'type': 'loss', 'content': 0.13549718260765076, 'timestamp': '2025-09-10 02:50:06.365173', 'step': 12191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:06.419867', 'step': 12191, 'epoch': 2} {'type': 'loss', 'content': 0.12378855049610138, 'timestamp': '2025-09-10 02:50:06.425833', 'step': 12192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:06.479453', 'step': 12192, 'epoch': 2} {'type': 'loss', 'content': 0.12773339450359344, 'timestamp': '2025-09-10 02:50:06.481441', 'step': 12193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:06.535737', 'step': 12193, 'epoch': 2} {'type': 'loss', 'content': 0.09991051256656647, 'timestamp': '2025-09-10 02:50:06.537813', 'step': 12194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:06.591317', 'step': 12194, 'epoch': 2} {'type': 'loss', 'content': 0.2141004055738449, 'timestamp': '2025-09-10 02:50:06.593294', 'step': 12195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:06.646915', 'step': 12195, 'epoch': 2} {'type': 'loss', 'content': 0.12867364287376404, 'timestamp': '2025-09-10 02:50:06.652738', 'step': 12196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:06.705832', 'step': 12196, 'epoch': 2} {'type': 'loss', 'content': 0.07610035687685013, 'timestamp': '2025-09-10 02:50:06.707920', 'step': 12197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:06.762122', 'step': 12197, 'epoch': 2} {'type': 'loss', 'content': 0.12990842759609222, 'timestamp': '2025-09-10 02:50:06.764156', 'step': 12198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:06.818735', 'step': 12198, 'epoch': 2} {'type': 'loss', 'content': 0.09662572294473648, 'timestamp': '2025-09-10 02:50:06.820808', 'step': 12199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:06.875179', 'step': 12199, 'epoch': 2} {'type': 'loss', 'content': 0.10197830945253372, 'timestamp': '2025-09-10 02:50:06.881167', 'step': 12200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:06.933928', 'step': 12200, 'epoch': 2} {'type': 'loss', 'content': 0.17719116806983948, 'timestamp': '2025-09-10 02:50:06.936797', 'step': 12201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:06.990683', 'step': 12201, 'epoch': 2} {'type': 'loss', 'content': 0.07636239379644394, 'timestamp': '2025-09-10 02:50:06.992968', 'step': 12202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:07.051820', 'step': 12202, 'epoch': 2} {'type': 'loss', 'content': 0.10819631814956665, 'timestamp': '2025-09-10 02:50:07.054578', 'step': 12203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:07.107763', 'step': 12203, 'epoch': 2} {'type': 'loss', 'content': 0.04459797963500023, 'timestamp': '2025-09-10 02:50:07.113790', 'step': 12204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:07.167170', 'step': 12204, 'epoch': 2} {'type': 'loss', 'content': 0.08105959743261337, 'timestamp': '2025-09-10 02:50:07.169144', 'step': 12205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:07.223707', 'step': 12205, 'epoch': 2} {'type': 'loss', 'content': 0.22663334012031555, 'timestamp': '2025-09-10 02:50:07.226297', 'step': 12206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:07.281402', 'step': 12206, 'epoch': 2} {'type': 'loss', 'content': 0.15798015892505646, 'timestamp': '2025-09-10 02:50:07.286872', 'step': 12207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:07.340527', 'step': 12207, 'epoch': 2} {'type': 'loss', 'content': 0.06758517771959305, 'timestamp': '2025-09-10 02:50:07.346443', 'step': 12208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:07.399434', 'step': 12208, 'epoch': 2} {'type': 'loss', 'content': 0.14844045042991638, 'timestamp': '2025-09-10 02:50:07.401506', 'step': 12209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:07.454949', 'step': 12209, 'epoch': 2} {'type': 'loss', 'content': 0.0517309233546257, 'timestamp': '2025-09-10 02:50:07.458192', 'step': 12210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:07.513530', 'step': 12210, 'epoch': 2} {'type': 'loss', 'content': 0.16681738197803497, 'timestamp': '2025-09-10 02:50:07.515584', 'step': 12211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:07.568645', 'step': 12211, 'epoch': 2} {'type': 'loss', 'content': 0.15339036285877228, 'timestamp': '2025-09-10 02:50:07.574502', 'step': 12212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:07.629683', 'step': 12212, 'epoch': 2} {'type': 'loss', 'content': 0.19200479984283447, 'timestamp': '2025-09-10 02:50:07.631631', 'step': 12213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:07.688624', 'step': 12213, 'epoch': 2} {'type': 'loss', 'content': 0.16354098916053772, 'timestamp': '2025-09-10 02:50:07.690714', 'step': 12214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:07.744148', 'step': 12214, 'epoch': 2} {'type': 'loss', 'content': 0.13950592279434204, 'timestamp': '2025-09-10 02:50:07.750029', 'step': 12215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:07.806399', 'step': 12215, 'epoch': 2} {'type': 'loss', 'content': 0.16660428047180176, 'timestamp': '2025-09-10 02:50:07.812312', 'step': 12216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:07.866656', 'step': 12216, 'epoch': 2} {'type': 'loss', 'content': 0.14372903108596802, 'timestamp': '2025-09-10 02:50:07.868814', 'step': 12217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:07.922454', 'step': 12217, 'epoch': 2} {'type': 'loss', 'content': 0.09907493740320206, 'timestamp': '2025-09-10 02:50:07.924485', 'step': 12218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:07.977881', 'step': 12218, 'epoch': 2} {'type': 'loss', 'content': 0.13511250913143158, 'timestamp': '2025-09-10 02:50:07.979903', 'step': 12219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:08.034362', 'step': 12219, 'epoch': 2} {'type': 'loss', 'content': 0.08143904060125351, 'timestamp': '2025-09-10 02:50:08.040205', 'step': 12220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:08.095724', 'step': 12220, 'epoch': 2} {'type': 'loss', 'content': 0.1587570309638977, 'timestamp': '2025-09-10 02:50:08.098205', 'step': 12221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:08.154165', 'step': 12221, 'epoch': 2} {'type': 'loss', 'content': 0.0588345043361187, 'timestamp': '2025-09-10 02:50:08.156133', 'step': 12222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:08.211837', 'step': 12222, 'epoch': 2} {'type': 'loss', 'content': 0.04563559591770172, 'timestamp': '2025-09-10 02:50:08.213866', 'step': 12223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:08.268394', 'step': 12223, 'epoch': 2} {'type': 'loss', 'content': 0.10987312346696854, 'timestamp': '2025-09-10 02:50:08.274313', 'step': 12224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:08.326971', 'step': 12224, 'epoch': 2} {'type': 'loss', 'content': 0.08701366186141968, 'timestamp': '2025-09-10 02:50:08.328962', 'step': 12225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:08.382127', 'step': 12225, 'epoch': 2} {'type': 'loss', 'content': 0.28227418661117554, 'timestamp': '2025-09-10 02:50:08.384132', 'step': 12226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:08.445295', 'step': 12226, 'epoch': 2} {'type': 'loss', 'content': 0.1618081033229828, 'timestamp': '2025-09-10 02:50:08.447289', 'step': 12227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:08.501291', 'step': 12227, 'epoch': 2} {'type': 'loss', 'content': 0.19033598899841309, 'timestamp': '2025-09-10 02:50:08.507392', 'step': 12228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:08.561740', 'step': 12228, 'epoch': 2} {'type': 'loss', 'content': 0.10949880629777908, 'timestamp': '2025-09-10 02:50:08.563748', 'step': 12229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:08.617742', 'step': 12229, 'epoch': 2} {'type': 'loss', 'content': 0.16265083849430084, 'timestamp': '2025-09-10 02:50:08.619852', 'step': 12230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:08.674642', 'step': 12230, 'epoch': 2} {'type': 'loss', 'content': 0.08145067095756531, 'timestamp': '2025-09-10 02:50:08.676767', 'step': 12231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:08.729895', 'step': 12231, 'epoch': 2} {'type': 'loss', 'content': 0.06730136275291443, 'timestamp': '2025-09-10 02:50:08.735722', 'step': 12232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:08.788657', 'step': 12232, 'epoch': 2} {'type': 'loss', 'content': 0.1499350666999817, 'timestamp': '2025-09-10 02:50:08.790789', 'step': 12233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:08.843518', 'step': 12233, 'epoch': 2} {'type': 'loss', 'content': 0.16049984097480774, 'timestamp': '2025-09-10 02:50:08.845635', 'step': 12234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:08.899959', 'step': 12234, 'epoch': 2} {'type': 'loss', 'content': 0.1644199937582016, 'timestamp': '2025-09-10 02:50:08.901935', 'step': 12235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:08.957777', 'step': 12235, 'epoch': 2} {'type': 'loss', 'content': 0.07130230963230133, 'timestamp': '2025-09-10 02:50:08.963860', 'step': 12236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:09.017401', 'step': 12236, 'epoch': 2} {'type': 'loss', 'content': 0.0854443833231926, 'timestamp': '2025-09-10 02:50:09.019430', 'step': 12237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:09.073633', 'step': 12237, 'epoch': 2} {'type': 'loss', 'content': 0.0810399204492569, 'timestamp': '2025-09-10 02:50:09.075863', 'step': 12238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:09.130484', 'step': 12238, 'epoch': 2} {'type': 'loss', 'content': 0.11593551188707352, 'timestamp': '2025-09-10 02:50:09.132639', 'step': 12239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:09.187143', 'step': 12239, 'epoch': 2} {'type': 'loss', 'content': 0.10848873853683472, 'timestamp': '2025-09-10 02:50:09.193092', 'step': 12240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:09.247248', 'step': 12240, 'epoch': 2} {'type': 'loss', 'content': 0.21974149346351624, 'timestamp': '2025-09-10 02:50:09.249449', 'step': 12241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:09.305736', 'step': 12241, 'epoch': 2} {'type': 'loss', 'content': 0.20783206820487976, 'timestamp': '2025-09-10 02:50:09.308063', 'step': 12242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:09.365631', 'step': 12242, 'epoch': 2} {'type': 'loss', 'content': 0.08762046694755554, 'timestamp': '2025-09-10 02:50:09.368044', 'step': 12243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:09.423695', 'step': 12243, 'epoch': 2} {'type': 'loss', 'content': 0.1974422037601471, 'timestamp': '2025-09-10 02:50:09.430141', 'step': 12244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:09.485094', 'step': 12244, 'epoch': 2} {'type': 'loss', 'content': 0.08642150461673737, 'timestamp': '2025-09-10 02:50:09.487234', 'step': 12245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:09.541096', 'step': 12245, 'epoch': 2} {'type': 'loss', 'content': 0.12218336760997772, 'timestamp': '2025-09-10 02:50:09.543442', 'step': 12246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:09.597673', 'step': 12246, 'epoch': 2} {'type': 'loss', 'content': 0.09183206409215927, 'timestamp': '2025-09-10 02:50:09.599873', 'step': 12247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:09.654365', 'step': 12247, 'epoch': 2} {'type': 'loss', 'content': 0.0790783241391182, 'timestamp': '2025-09-10 02:50:09.660611', 'step': 12248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:09.715736', 'step': 12248, 'epoch': 2} {'type': 'loss', 'content': 0.12332703173160553, 'timestamp': '2025-09-10 02:50:09.717951', 'step': 12249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:09.775633', 'step': 12249, 'epoch': 2} {'type': 'loss', 'content': 0.1301763504743576, 'timestamp': '2025-09-10 02:50:09.777811', 'step': 12250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:09.832157', 'step': 12250, 'epoch': 2} {'type': 'loss', 'content': 0.08522013574838638, 'timestamp': '2025-09-10 02:50:09.834491', 'step': 12251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:09.889089', 'step': 12251, 'epoch': 2} {'type': 'loss', 'content': 0.21802224218845367, 'timestamp': '2025-09-10 02:50:09.895298', 'step': 12252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:09.949356', 'step': 12252, 'epoch': 2} {'type': 'loss', 'content': 0.20318986475467682, 'timestamp': '2025-09-10 02:50:09.952160', 'step': 12253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:10.006144', 'step': 12253, 'epoch': 2} {'type': 'loss', 'content': 0.12217765301465988, 'timestamp': '2025-09-10 02:50:10.008245', 'step': 12254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:10.061990', 'step': 12254, 'epoch': 2} {'type': 'loss', 'content': 0.1195126473903656, 'timestamp': '2025-09-10 02:50:10.064073', 'step': 12255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:10.117726', 'step': 12255, 'epoch': 2} {'type': 'loss', 'content': 0.07318585366010666, 'timestamp': '2025-09-10 02:50:10.123819', 'step': 12256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:10.176968', 'step': 12256, 'epoch': 2} {'type': 'loss', 'content': 0.12546628713607788, 'timestamp': '2025-09-10 02:50:10.179210', 'step': 12257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:10.234254', 'step': 12257, 'epoch': 2} {'type': 'loss', 'content': 0.13603128492832184, 'timestamp': '2025-09-10 02:50:10.236451', 'step': 12258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:10.293597', 'step': 12258, 'epoch': 2} {'type': 'loss', 'content': 0.09382393956184387, 'timestamp': '2025-09-10 02:50:10.297254', 'step': 12259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:10.352339', 'step': 12259, 'epoch': 2} {'type': 'loss', 'content': 0.0912674069404602, 'timestamp': '2025-09-10 02:50:10.358876', 'step': 12260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:10.414052', 'step': 12260, 'epoch': 2} {'type': 'loss', 'content': 0.10521894693374634, 'timestamp': '2025-09-10 02:50:10.416492', 'step': 12261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:10.472663', 'step': 12261, 'epoch': 2} {'type': 'loss', 'content': 0.1274184286594391, 'timestamp': '2025-09-10 02:50:10.474953', 'step': 12262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:10.537390', 'step': 12262, 'epoch': 2} {'type': 'loss', 'content': 0.14067059755325317, 'timestamp': '2025-09-10 02:50:10.539714', 'step': 12263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:10.600615', 'step': 12263, 'epoch': 2} {'type': 'loss', 'content': 0.1856253743171692, 'timestamp': '2025-09-10 02:50:10.606714', 'step': 12264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:10.662750', 'step': 12264, 'epoch': 2} {'type': 'loss', 'content': 0.26162880659103394, 'timestamp': '2025-09-10 02:50:10.664992', 'step': 12265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:10.723997', 'step': 12265, 'epoch': 2} {'type': 'loss', 'content': 0.11935247480869293, 'timestamp': '2025-09-10 02:50:10.726300', 'step': 12266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:10.783239', 'step': 12266, 'epoch': 2} {'type': 'loss', 'content': 0.06982909888029099, 'timestamp': '2025-09-10 02:50:10.785466', 'step': 12267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:10.841345', 'step': 12267, 'epoch': 2} {'type': 'loss', 'content': 0.15457989275455475, 'timestamp': '2025-09-10 02:50:10.847578', 'step': 12268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:10.900670', 'step': 12268, 'epoch': 2} {'type': 'loss', 'content': 0.09965392202138901, 'timestamp': '2025-09-10 02:50:10.902860', 'step': 12269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:10.956856', 'step': 12269, 'epoch': 2} {'type': 'loss', 'content': 0.17269766330718994, 'timestamp': '2025-09-10 02:50:10.959047', 'step': 12270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:11.014026', 'step': 12270, 'epoch': 2} {'type': 'loss', 'content': 0.08168403804302216, 'timestamp': '2025-09-10 02:50:11.016220', 'step': 12271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:11.071128', 'step': 12271, 'epoch': 2} {'type': 'loss', 'content': 0.1593175232410431, 'timestamp': '2025-09-10 02:50:11.077223', 'step': 12272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:11.132656', 'step': 12272, 'epoch': 2} {'type': 'loss', 'content': 0.20500846207141876, 'timestamp': '2025-09-10 02:50:11.134883', 'step': 12273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:11.191269', 'step': 12273, 'epoch': 2} {'type': 'loss', 'content': 0.15392358601093292, 'timestamp': '2025-09-10 02:50:11.193666', 'step': 12274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:11.248801', 'step': 12274, 'epoch': 2} {'type': 'loss', 'content': 0.07325760275125504, 'timestamp': '2025-09-10 02:50:11.250997', 'step': 12275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:11.305287', 'step': 12275, 'epoch': 2} {'type': 'loss', 'content': 0.10175083577632904, 'timestamp': '2025-09-10 02:50:11.311488', 'step': 12276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:11.365225', 'step': 12276, 'epoch': 2} {'type': 'loss', 'content': 0.08690129220485687, 'timestamp': '2025-09-10 02:50:11.367581', 'step': 12277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:11.422193', 'step': 12277, 'epoch': 2} {'type': 'loss', 'content': 0.08438718318939209, 'timestamp': '2025-09-10 02:50:11.424609', 'step': 12278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:11.479503', 'step': 12278, 'epoch': 2} {'type': 'loss', 'content': 0.09506799280643463, 'timestamp': '2025-09-10 02:50:11.481579', 'step': 12279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:11.536475', 'step': 12279, 'epoch': 2} {'type': 'loss', 'content': 0.10672426968812943, 'timestamp': '2025-09-10 02:50:11.542514', 'step': 12280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:11.596762', 'step': 12280, 'epoch': 2} {'type': 'loss', 'content': 0.09156901389360428, 'timestamp': '2025-09-10 02:50:11.598927', 'step': 12281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:11.653796', 'step': 12281, 'epoch': 2} {'type': 'loss', 'content': 0.17982180416584015, 'timestamp': '2025-09-10 02:50:11.655897', 'step': 12282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:11.709829', 'step': 12282, 'epoch': 2} {'type': 'loss', 'content': 0.052846089005470276, 'timestamp': '2025-09-10 02:50:11.712500', 'step': 12283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:11.767135', 'step': 12283, 'epoch': 2} {'type': 'loss', 'content': 0.058336272835731506, 'timestamp': '2025-09-10 02:50:11.773173', 'step': 12284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:11.827317', 'step': 12284, 'epoch': 2} {'type': 'loss', 'content': 0.057741232216358185, 'timestamp': '2025-09-10 02:50:11.829556', 'step': 12285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:11.884611', 'step': 12285, 'epoch': 2} {'type': 'loss', 'content': 0.07498026639223099, 'timestamp': '2025-09-10 02:50:11.887149', 'step': 12286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:11.941692', 'step': 12286, 'epoch': 2} {'type': 'loss', 'content': 0.07297009974718094, 'timestamp': '2025-09-10 02:50:11.943889', 'step': 12287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:11.998622', 'step': 12287, 'epoch': 2} {'type': 'loss', 'content': 0.08774162828922272, 'timestamp': '2025-09-10 02:50:12.004853', 'step': 12288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:12.058644', 'step': 12288, 'epoch': 2} {'type': 'loss', 'content': 0.09066005796194077, 'timestamp': '2025-09-10 02:50:12.060981', 'step': 12289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:12.115215', 'step': 12289, 'epoch': 2} {'type': 'loss', 'content': 0.10266713798046112, 'timestamp': '2025-09-10 02:50:12.117488', 'step': 12290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:12.172270', 'step': 12290, 'epoch': 2} {'type': 'loss', 'content': 0.10000824183225632, 'timestamp': '2025-09-10 02:50:12.174481', 'step': 12291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:12.229014', 'step': 12291, 'epoch': 2} {'type': 'loss', 'content': 0.1529425084590912, 'timestamp': '2025-09-10 02:50:12.235923', 'step': 12292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:12.290686', 'step': 12292, 'epoch': 2} {'type': 'loss', 'content': 0.07287915050983429, 'timestamp': '2025-09-10 02:50:12.292904', 'step': 12293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:12.347467', 'step': 12293, 'epoch': 2} {'type': 'loss', 'content': 0.1065540611743927, 'timestamp': '2025-09-10 02:50:12.349654', 'step': 12294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:12.404117', 'step': 12294, 'epoch': 2} {'type': 'loss', 'content': 0.07311075925827026, 'timestamp': '2025-09-10 02:50:12.406410', 'step': 12295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:12.460501', 'step': 12295, 'epoch': 2} {'type': 'loss', 'content': 0.12462156265974045, 'timestamp': '2025-09-10 02:50:12.466590', 'step': 12296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:12.525643', 'step': 12296, 'epoch': 2} {'type': 'loss', 'content': 0.05230562016367912, 'timestamp': '2025-09-10 02:50:12.527855', 'step': 12297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:12.582140', 'step': 12297, 'epoch': 2} {'type': 'loss', 'content': 0.11914142221212387, 'timestamp': '2025-09-10 02:50:12.584471', 'step': 12298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:12.638924', 'step': 12298, 'epoch': 2} {'type': 'loss', 'content': 0.19081862270832062, 'timestamp': '2025-09-10 02:50:12.641009', 'step': 12299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:12.695242', 'step': 12299, 'epoch': 2} {'type': 'loss', 'content': 0.11509162187576294, 'timestamp': '2025-09-10 02:50:12.701341', 'step': 12300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:12.754905', 'step': 12300, 'epoch': 2} {'type': 'loss', 'content': 0.11940258741378784, 'timestamp': '2025-09-10 02:50:12.757127', 'step': 12301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:12.810642', 'step': 12301, 'epoch': 2} {'type': 'loss', 'content': 0.11277651786804199, 'timestamp': '2025-09-10 02:50:12.812809', 'step': 12302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:12.866321', 'step': 12302, 'epoch': 2} {'type': 'loss', 'content': 0.18796804547309875, 'timestamp': '2025-09-10 02:50:12.868685', 'step': 12303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:12.922987', 'step': 12303, 'epoch': 2} {'type': 'loss', 'content': 0.08881412446498871, 'timestamp': '2025-09-10 02:50:12.929173', 'step': 12304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:12.982703', 'step': 12304, 'epoch': 2} {'type': 'loss', 'content': 0.10519318282604218, 'timestamp': '2025-09-10 02:50:12.984981', 'step': 12305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:13.038810', 'step': 12305, 'epoch': 2} {'type': 'loss', 'content': 0.05051971599459648, 'timestamp': '2025-09-10 02:50:13.041016', 'step': 12306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:13.095333', 'step': 12306, 'epoch': 2} {'type': 'loss', 'content': 0.1535911113023758, 'timestamp': '2025-09-10 02:50:13.098443', 'step': 12307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:13.153136', 'step': 12307, 'epoch': 2} {'type': 'loss', 'content': 0.12292281538248062, 'timestamp': '2025-09-10 02:50:13.159121', 'step': 12308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:13.213271', 'step': 12308, 'epoch': 2} {'type': 'loss', 'content': 0.19288785755634308, 'timestamp': '2025-09-10 02:50:13.215594', 'step': 12309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:13.270324', 'step': 12309, 'epoch': 2} {'type': 'loss', 'content': 0.11466081440448761, 'timestamp': '2025-09-10 02:50:13.272569', 'step': 12310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:13.326686', 'step': 12310, 'epoch': 2} {'type': 'loss', 'content': 0.11888901144266129, 'timestamp': '2025-09-10 02:50:13.328838', 'step': 12311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:13.382814', 'step': 12311, 'epoch': 2} {'type': 'loss', 'content': 0.12753866612911224, 'timestamp': '2025-09-10 02:50:13.388418', 'step': 12312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:13.443600', 'step': 12312, 'epoch': 2} {'type': 'loss', 'content': 0.0891706645488739, 'timestamp': '2025-09-10 02:50:13.445975', 'step': 12313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:13.500704', 'step': 12313, 'epoch': 2} {'type': 'loss', 'content': 0.09761504083871841, 'timestamp': '2025-09-10 02:50:13.502794', 'step': 12314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:13.556884', 'step': 12314, 'epoch': 2} {'type': 'loss', 'content': 0.13239075243473053, 'timestamp': '2025-09-10 02:50:13.559072', 'step': 12315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:13.612666', 'step': 12315, 'epoch': 2} {'type': 'loss', 'content': 0.12839174270629883, 'timestamp': '2025-09-10 02:50:13.618674', 'step': 12316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:13.673603', 'step': 12316, 'epoch': 2} {'type': 'loss', 'content': 0.045905884355306625, 'timestamp': '2025-09-10 02:50:13.675918', 'step': 12317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:13.729773', 'step': 12317, 'epoch': 2} {'type': 'loss', 'content': 0.25667089223861694, 'timestamp': '2025-09-10 02:50:13.731927', 'step': 12318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:13.786365', 'step': 12318, 'epoch': 2} {'type': 'loss', 'content': 0.1486039161682129, 'timestamp': '2025-09-10 02:50:13.788511', 'step': 12319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:13.842697', 'step': 12319, 'epoch': 2} {'type': 'loss', 'content': 0.09050192683935165, 'timestamp': '2025-09-10 02:50:13.848439', 'step': 12320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:13.901712', 'step': 12320, 'epoch': 2} {'type': 'loss', 'content': 0.16795013844966888, 'timestamp': '2025-09-10 02:50:13.903897', 'step': 12321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:13.958066', 'step': 12321, 'epoch': 2} {'type': 'loss', 'content': 0.12472869455814362, 'timestamp': '2025-09-10 02:50:13.960189', 'step': 12322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:14.014524', 'step': 12322, 'epoch': 2} {'type': 'loss', 'content': 0.1943308562040329, 'timestamp': '2025-09-10 02:50:14.016834', 'step': 12323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:14.070435', 'step': 12323, 'epoch': 2} {'type': 'loss', 'content': 0.057488083839416504, 'timestamp': '2025-09-10 02:50:14.076336', 'step': 12324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:14.130938', 'step': 12324, 'epoch': 2} {'type': 'loss', 'content': 0.08868499100208282, 'timestamp': '2025-09-10 02:50:14.133232', 'step': 12325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:14.188256', 'step': 12325, 'epoch': 2} {'type': 'loss', 'content': 0.17657431960105896, 'timestamp': '2025-09-10 02:50:14.190432', 'step': 12326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:14.245379', 'step': 12326, 'epoch': 2} {'type': 'loss', 'content': 0.15040981769561768, 'timestamp': '2025-09-10 02:50:14.247592', 'step': 12327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:14.301411', 'step': 12327, 'epoch': 2} {'type': 'loss', 'content': 0.17226386070251465, 'timestamp': '2025-09-10 02:50:14.307391', 'step': 12328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:14.360979', 'step': 12328, 'epoch': 2} {'type': 'loss', 'content': 0.23668387532234192, 'timestamp': '2025-09-10 02:50:14.363136', 'step': 12329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:14.416884', 'step': 12329, 'epoch': 2} {'type': 'loss', 'content': 0.13584093749523163, 'timestamp': '2025-09-10 02:50:14.419102', 'step': 12330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:14.473408', 'step': 12330, 'epoch': 2} {'type': 'loss', 'content': 0.10321037471294403, 'timestamp': '2025-09-10 02:50:14.475839', 'step': 12331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:14.529891', 'step': 12331, 'epoch': 2} {'type': 'loss', 'content': 0.06783141195774078, 'timestamp': '2025-09-10 02:50:14.536152', 'step': 12332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:14.591780', 'step': 12332, 'epoch': 2} {'type': 'loss', 'content': 0.06980803608894348, 'timestamp': '2025-09-10 02:50:14.593960', 'step': 12333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:14.649173', 'step': 12333, 'epoch': 2} {'type': 'loss', 'content': 0.10338378697633743, 'timestamp': '2025-09-10 02:50:14.651292', 'step': 12334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:14.711667', 'step': 12334, 'epoch': 2} {'type': 'loss', 'content': 0.12761731445789337, 'timestamp': '2025-09-10 02:50:14.713878', 'step': 12335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:14.767683', 'step': 12335, 'epoch': 2} {'type': 'loss', 'content': 0.11527945101261139, 'timestamp': '2025-09-10 02:50:14.773666', 'step': 12336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:14.826688', 'step': 12336, 'epoch': 2} {'type': 'loss', 'content': 0.14926546812057495, 'timestamp': '2025-09-10 02:50:14.828902', 'step': 12337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:14.882314', 'step': 12337, 'epoch': 2} {'type': 'loss', 'content': 0.08196704834699631, 'timestamp': '2025-09-10 02:50:14.884596', 'step': 12338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:14.938672', 'step': 12338, 'epoch': 2} {'type': 'loss', 'content': 0.21684834361076355, 'timestamp': '2025-09-10 02:50:14.940723', 'step': 12339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:14.996196', 'step': 12339, 'epoch': 2} {'type': 'loss', 'content': 0.1357928365468979, 'timestamp': '2025-09-10 02:50:15.002555', 'step': 12340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:15.056612', 'step': 12340, 'epoch': 2} {'type': 'loss', 'content': 0.19516007602214813, 'timestamp': '2025-09-10 02:50:15.059117', 'step': 12341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:15.116690', 'step': 12341, 'epoch': 2} {'type': 'loss', 'content': 0.13331440091133118, 'timestamp': '2025-09-10 02:50:15.119313', 'step': 12342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:15.174629', 'step': 12342, 'epoch': 2} {'type': 'loss', 'content': 0.08971104025840759, 'timestamp': '2025-09-10 02:50:15.177098', 'step': 12343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:15.239946', 'step': 12343, 'epoch': 2} {'type': 'loss', 'content': 0.1589382439851761, 'timestamp': '2025-09-10 02:50:15.247397', 'step': 12344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:15.300895', 'step': 12344, 'epoch': 2} {'type': 'loss', 'content': 0.07815594971179962, 'timestamp': '2025-09-10 02:50:15.303432', 'step': 12345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:15.357862', 'step': 12345, 'epoch': 2} {'type': 'loss', 'content': 0.18689283728599548, 'timestamp': '2025-09-10 02:50:15.360193', 'step': 12346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:15.414973', 'step': 12346, 'epoch': 2} {'type': 'loss', 'content': 0.13143892586231232, 'timestamp': '2025-09-10 02:50:15.417331', 'step': 12347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:15.471283', 'step': 12347, 'epoch': 2} {'type': 'loss', 'content': 0.10591090470552444, 'timestamp': '2025-09-10 02:50:15.477381', 'step': 12348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:15.531265', 'step': 12348, 'epoch': 2} {'type': 'loss', 'content': 0.15201450884342194, 'timestamp': '2025-09-10 02:50:15.534034', 'step': 12349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:15.591740', 'step': 12349, 'epoch': 2} {'type': 'loss', 'content': 0.14238713681697845, 'timestamp': '2025-09-10 02:50:15.593928', 'step': 12350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:15.647910', 'step': 12350, 'epoch': 2} {'type': 'loss', 'content': 0.16056133806705475, 'timestamp': '2025-09-10 02:50:15.650441', 'step': 12351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:15.704634', 'step': 12351, 'epoch': 2} {'type': 'loss', 'content': 0.08632597327232361, 'timestamp': '2025-09-10 02:50:15.716529', 'step': 12352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:15.775499', 'step': 12352, 'epoch': 2} {'type': 'loss', 'content': 0.17404857277870178, 'timestamp': '2025-09-10 02:50:15.777906', 'step': 12353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:15.835555', 'step': 12353, 'epoch': 2} {'type': 'loss', 'content': 0.1585768759250641, 'timestamp': '2025-09-10 02:50:15.837880', 'step': 12354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:15.893521', 'step': 12354, 'epoch': 2} {'type': 'loss', 'content': 0.21328146755695343, 'timestamp': '2025-09-10 02:50:15.895813', 'step': 12355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:15.954465', 'step': 12355, 'epoch': 2} {'type': 'loss', 'content': 0.07848786562681198, 'timestamp': '2025-09-10 02:50:15.960287', 'step': 12356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:16.014214', 'step': 12356, 'epoch': 2} {'type': 'loss', 'content': 0.1206459030508995, 'timestamp': '2025-09-10 02:50:16.016543', 'step': 12357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:16.070290', 'step': 12357, 'epoch': 2} {'type': 'loss', 'content': 0.06178072839975357, 'timestamp': '2025-09-10 02:50:16.072505', 'step': 12358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:16.128400', 'step': 12358, 'epoch': 2} {'type': 'loss', 'content': 0.1377752274274826, 'timestamp': '2025-09-10 02:50:16.130944', 'step': 12359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:16.187800', 'step': 12359, 'epoch': 2} {'type': 'loss', 'content': 0.14154289662837982, 'timestamp': '2025-09-10 02:50:16.194751', 'step': 12360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:16.256625', 'step': 12360, 'epoch': 2} {'type': 'loss', 'content': 0.08905799686908722, 'timestamp': '2025-09-10 02:50:16.258888', 'step': 12361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:16.313067', 'step': 12361, 'epoch': 2} {'type': 'loss', 'content': 0.1460629105567932, 'timestamp': '2025-09-10 02:50:16.315231', 'step': 12362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:16.371306', 'step': 12362, 'epoch': 2} {'type': 'loss', 'content': 0.12826646864414215, 'timestamp': '2025-09-10 02:50:16.373627', 'step': 12363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:16.428578', 'step': 12363, 'epoch': 2} {'type': 'loss', 'content': 0.05480530858039856, 'timestamp': '2025-09-10 02:50:16.434980', 'step': 12364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:16.489318', 'step': 12364, 'epoch': 2} {'type': 'loss', 'content': 0.1120738685131073, 'timestamp': '2025-09-10 02:50:16.491595', 'step': 12365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:16.545641', 'step': 12365, 'epoch': 2} {'type': 'loss', 'content': 0.10958413779735565, 'timestamp': '2025-09-10 02:50:16.548020', 'step': 12366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:16.602803', 'step': 12366, 'epoch': 2} {'type': 'loss', 'content': 0.13373331725597382, 'timestamp': '2025-09-10 02:50:16.610001', 'step': 12367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:16.665855', 'step': 12367, 'epoch': 2} {'type': 'loss', 'content': 0.11990446597337723, 'timestamp': '2025-09-10 02:50:16.672375', 'step': 12368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:16.726668', 'step': 12368, 'epoch': 2} {'type': 'loss', 'content': 0.09198624640703201, 'timestamp': '2025-09-10 02:50:16.728933', 'step': 12369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:16.783417', 'step': 12369, 'epoch': 2} {'type': 'loss', 'content': 0.08339011669158936, 'timestamp': '2025-09-10 02:50:16.785807', 'step': 12370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:16.840831', 'step': 12370, 'epoch': 2} {'type': 'loss', 'content': 0.1798858344554901, 'timestamp': '2025-09-10 02:50:16.843349', 'step': 12371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:16.899112', 'step': 12371, 'epoch': 2} {'type': 'loss', 'content': 0.14923688769340515, 'timestamp': '2025-09-10 02:50:16.905370', 'step': 12372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:16.965243', 'step': 12372, 'epoch': 2} {'type': 'loss', 'content': 0.07759717106819153, 'timestamp': '2025-09-10 02:50:16.970089', 'step': 12373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:17.025402', 'step': 12373, 'epoch': 2} {'type': 'loss', 'content': 0.16834142804145813, 'timestamp': '2025-09-10 02:50:17.027869', 'step': 12374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:17.082633', 'step': 12374, 'epoch': 2} {'type': 'loss', 'content': 0.13053907454013824, 'timestamp': '2025-09-10 02:50:17.085090', 'step': 12375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:17.139427', 'step': 12375, 'epoch': 2} {'type': 'loss', 'content': 0.11699000746011734, 'timestamp': '2025-09-10 02:50:17.145679', 'step': 12376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:17.199678', 'step': 12376, 'epoch': 2} {'type': 'loss', 'content': 0.14363659918308258, 'timestamp': '2025-09-10 02:50:17.202081', 'step': 12377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:17.256077', 'step': 12377, 'epoch': 2} {'type': 'loss', 'content': 0.11333408951759338, 'timestamp': '2025-09-10 02:50:17.258362', 'step': 12378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:17.312403', 'step': 12378, 'epoch': 2} {'type': 'loss', 'content': 0.08992095291614532, 'timestamp': '2025-09-10 02:50:17.314737', 'step': 12379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:17.368459', 'step': 12379, 'epoch': 2} {'type': 'loss', 'content': 0.11003129929304123, 'timestamp': '2025-09-10 02:50:17.374671', 'step': 12380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:17.428726', 'step': 12380, 'epoch': 2} {'type': 'loss', 'content': 0.10464553534984589, 'timestamp': '2025-09-10 02:50:17.431084', 'step': 12381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:17.486898', 'step': 12381, 'epoch': 2} {'type': 'loss', 'content': 0.07823771238327026, 'timestamp': '2025-09-10 02:50:17.489301', 'step': 12382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:17.545381', 'step': 12382, 'epoch': 2} {'type': 'loss', 'content': 0.17156946659088135, 'timestamp': '2025-09-10 02:50:17.547854', 'step': 12383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:17.605484', 'step': 12383, 'epoch': 2} {'type': 'loss', 'content': 0.08957868814468384, 'timestamp': '2025-09-10 02:50:17.612319', 'step': 12384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:17.671951', 'step': 12384, 'epoch': 2} {'type': 'loss', 'content': 0.13076743483543396, 'timestamp': '2025-09-10 02:50:17.674480', 'step': 12385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:17.731638', 'step': 12385, 'epoch': 2} {'type': 'loss', 'content': 0.11362790316343307, 'timestamp': '2025-09-10 02:50:17.734016', 'step': 12386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:17.791979', 'step': 12386, 'epoch': 2} {'type': 'loss', 'content': 0.2565358877182007, 'timestamp': '2025-09-10 02:50:17.794347', 'step': 12387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:17.851187', 'step': 12387, 'epoch': 2} {'type': 'loss', 'content': 0.11698883771896362, 'timestamp': '2025-09-10 02:50:17.858104', 'step': 12388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:17.913781', 'step': 12388, 'epoch': 2} {'type': 'loss', 'content': 0.15449413657188416, 'timestamp': '2025-09-10 02:50:17.916243', 'step': 12389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:17.973394', 'step': 12389, 'epoch': 2} {'type': 'loss', 'content': 0.10986969619989395, 'timestamp': '2025-09-10 02:50:17.975740', 'step': 12390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:18.032815', 'step': 12390, 'epoch': 2} {'type': 'loss', 'content': 0.10209941864013672, 'timestamp': '2025-09-10 02:50:18.035187', 'step': 12391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:18.089886', 'step': 12391, 'epoch': 2} {'type': 'loss', 'content': 0.11930260807275772, 'timestamp': '2025-09-10 02:50:18.096362', 'step': 12392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:18.151002', 'step': 12392, 'epoch': 2} {'type': 'loss', 'content': 0.058000266551971436, 'timestamp': '2025-09-10 02:50:18.153394', 'step': 12393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:18.207650', 'step': 12393, 'epoch': 2} {'type': 'loss', 'content': 0.14500512182712555, 'timestamp': '2025-09-10 02:50:18.210110', 'step': 12394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:18.267402', 'step': 12394, 'epoch': 2} {'type': 'loss', 'content': 0.10942307859659195, 'timestamp': '2025-09-10 02:50:18.269803', 'step': 12395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:18.330396', 'step': 12395, 'epoch': 2} {'type': 'loss', 'content': 0.14851248264312744, 'timestamp': '2025-09-10 02:50:18.339078', 'step': 12396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:18.400653', 'step': 12396, 'epoch': 2} {'type': 'loss', 'content': 0.11506646126508713, 'timestamp': '2025-09-10 02:50:18.403114', 'step': 12397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:18.463825', 'step': 12397, 'epoch': 2} {'type': 'loss', 'content': 0.11811482906341553, 'timestamp': '2025-09-10 02:50:18.466371', 'step': 12398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:18.528429', 'step': 12398, 'epoch': 2} {'type': 'loss', 'content': 0.12555795907974243, 'timestamp': '2025-09-10 02:50:18.530890', 'step': 12399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:18.590534', 'step': 12399, 'epoch': 2} {'type': 'loss', 'content': 0.07474341243505478, 'timestamp': '2025-09-10 02:50:18.597593', 'step': 12400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:18.656103', 'step': 12400, 'epoch': 2} {'type': 'loss', 'content': 0.09772687405347824, 'timestamp': '2025-09-10 02:50:18.658462', 'step': 12401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:18.712788', 'step': 12401, 'epoch': 2} {'type': 'loss', 'content': 0.1837674230337143, 'timestamp': '2025-09-10 02:50:18.715434', 'step': 12402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:18.770608', 'step': 12402, 'epoch': 2} {'type': 'loss', 'content': 0.08282451331615448, 'timestamp': '2025-09-10 02:50:18.773130', 'step': 12403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:18.829923', 'step': 12403, 'epoch': 2} {'type': 'loss', 'content': 0.24855078756809235, 'timestamp': '2025-09-10 02:50:18.836239', 'step': 12404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:18.890688', 'step': 12404, 'epoch': 2} {'type': 'loss', 'content': 0.14138998091220856, 'timestamp': '2025-09-10 02:50:18.893108', 'step': 12405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:18.947024', 'step': 12405, 'epoch': 2} {'type': 'loss', 'content': 0.2637956440448761, 'timestamp': '2025-09-10 02:50:18.949359', 'step': 12406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:19.003467', 'step': 12406, 'epoch': 2} {'type': 'loss', 'content': 0.14601828157901764, 'timestamp': '2025-09-10 02:50:19.005815', 'step': 12407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:19.060099', 'step': 12407, 'epoch': 2} {'type': 'loss', 'content': 0.09770841896533966, 'timestamp': '2025-09-10 02:50:19.066264', 'step': 12408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:19.119651', 'step': 12408, 'epoch': 2} {'type': 'loss', 'content': 0.14069314301013947, 'timestamp': '2025-09-10 02:50:19.122092', 'step': 12409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:19.176150', 'step': 12409, 'epoch': 2} {'type': 'loss', 'content': 0.07809063047170639, 'timestamp': '2025-09-10 02:50:19.178497', 'step': 12410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:19.233481', 'step': 12410, 'epoch': 2} {'type': 'loss', 'content': 0.08178498595952988, 'timestamp': '2025-09-10 02:50:19.236032', 'step': 12411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:19.291971', 'step': 12411, 'epoch': 2} {'type': 'loss', 'content': 0.12306354194879532, 'timestamp': '2025-09-10 02:50:19.298072', 'step': 12412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:19.351907', 'step': 12412, 'epoch': 2} {'type': 'loss', 'content': 0.11847870796918869, 'timestamp': '2025-09-10 02:50:19.354195', 'step': 12413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:19.409046', 'step': 12413, 'epoch': 2} {'type': 'loss', 'content': 0.10453663766384125, 'timestamp': '2025-09-10 02:50:19.411428', 'step': 12414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:19.466029', 'step': 12414, 'epoch': 2} {'type': 'loss', 'content': 0.13395890593528748, 'timestamp': '2025-09-10 02:50:19.468419', 'step': 12415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:19.523784', 'step': 12415, 'epoch': 2} {'type': 'loss', 'content': 0.1753644198179245, 'timestamp': '2025-09-10 02:50:19.529830', 'step': 12416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:19.584307', 'step': 12416, 'epoch': 2} {'type': 'loss', 'content': 0.0955473780632019, 'timestamp': '2025-09-10 02:50:19.586773', 'step': 12417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:19.641166', 'step': 12417, 'epoch': 2} {'type': 'loss', 'content': 0.10168711841106415, 'timestamp': '2025-09-10 02:50:19.643673', 'step': 12418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:19.697773', 'step': 12418, 'epoch': 2} {'type': 'loss', 'content': 0.18497978150844574, 'timestamp': '2025-09-10 02:50:19.700077', 'step': 12419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:19.753546', 'step': 12419, 'epoch': 2} {'type': 'loss', 'content': 0.11268410086631775, 'timestamp': '2025-09-10 02:50:19.759616', 'step': 12420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:19.813097', 'step': 12420, 'epoch': 2} {'type': 'loss', 'content': 0.12443626672029495, 'timestamp': '2025-09-10 02:50:19.815343', 'step': 12421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:19.869232', 'step': 12421, 'epoch': 2} {'type': 'loss', 'content': 0.16980619728565216, 'timestamp': '2025-09-10 02:50:19.871535', 'step': 12422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:19.926190', 'step': 12422, 'epoch': 2} {'type': 'loss', 'content': 0.0749821811914444, 'timestamp': '2025-09-10 02:50:19.928455', 'step': 12423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:19.981819', 'step': 12423, 'epoch': 2} {'type': 'loss', 'content': 0.10883790254592896, 'timestamp': '2025-09-10 02:50:19.987875', 'step': 12424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:20.041003', 'step': 12424, 'epoch': 2} {'type': 'loss', 'content': 0.09172044694423676, 'timestamp': '2025-09-10 02:50:20.043367', 'step': 12425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:20.097082', 'step': 12425, 'epoch': 2} {'type': 'loss', 'content': 0.18876862525939941, 'timestamp': '2025-09-10 02:50:20.099447', 'step': 12426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:20.152869', 'step': 12426, 'epoch': 2} {'type': 'loss', 'content': 0.12899434566497803, 'timestamp': '2025-09-10 02:50:20.155138', 'step': 12427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:20.208615', 'step': 12427, 'epoch': 2} {'type': 'loss', 'content': 0.04524419456720352, 'timestamp': '2025-09-10 02:50:20.214474', 'step': 12428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:20.267913', 'step': 12428, 'epoch': 2} {'type': 'loss', 'content': 0.1991795301437378, 'timestamp': '2025-09-10 02:50:20.270452', 'step': 12429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:20.324203', 'step': 12429, 'epoch': 2} {'type': 'loss', 'content': 0.1245269849896431, 'timestamp': '2025-09-10 02:50:20.326490', 'step': 12430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:20.380471', 'step': 12430, 'epoch': 2} {'type': 'loss', 'content': 0.15952523052692413, 'timestamp': '2025-09-10 02:50:20.382919', 'step': 12431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:20.437404', 'step': 12431, 'epoch': 2} {'type': 'loss', 'content': 0.039563242346048355, 'timestamp': '2025-09-10 02:50:20.443614', 'step': 12432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:20.497141', 'step': 12432, 'epoch': 2} {'type': 'loss', 'content': 0.12947945296764374, 'timestamp': '2025-09-10 02:50:20.499462', 'step': 12433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:20.553118', 'step': 12433, 'epoch': 2} {'type': 'loss', 'content': 0.14628712832927704, 'timestamp': '2025-09-10 02:50:20.555437', 'step': 12434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:20.609350', 'step': 12434, 'epoch': 2} {'type': 'loss', 'content': 0.05431428179144859, 'timestamp': '2025-09-10 02:50:20.611629', 'step': 12435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:20.665064', 'step': 12435, 'epoch': 2} {'type': 'loss', 'content': 0.10248979926109314, 'timestamp': '2025-09-10 02:50:20.671083', 'step': 12436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:20.724431', 'step': 12436, 'epoch': 2} {'type': 'loss', 'content': 0.14616312086582184, 'timestamp': '2025-09-10 02:50:20.726624', 'step': 12437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:20.780608', 'step': 12437, 'epoch': 2} {'type': 'loss', 'content': 0.08768026530742645, 'timestamp': '2025-09-10 02:50:20.782943', 'step': 12438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:20.836462', 'step': 12438, 'epoch': 2} {'type': 'loss', 'content': 0.23226875066757202, 'timestamp': '2025-09-10 02:50:20.838857', 'step': 12439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:20.894611', 'step': 12439, 'epoch': 2} {'type': 'loss', 'content': 0.10334011167287827, 'timestamp': '2025-09-10 02:50:20.900409', 'step': 12440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:20.953957', 'step': 12440, 'epoch': 2} {'type': 'loss', 'content': 0.09312313050031662, 'timestamp': '2025-09-10 02:50:20.956304', 'step': 12441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:21.018090', 'step': 12441, 'epoch': 2} {'type': 'loss', 'content': 0.08733028918504715, 'timestamp': '2025-09-10 02:50:21.020520', 'step': 12442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:21.074776', 'step': 12442, 'epoch': 2} {'type': 'loss', 'content': 0.08471569418907166, 'timestamp': '2025-09-10 02:50:21.076876', 'step': 12443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:21.130833', 'step': 12443, 'epoch': 2} {'type': 'loss', 'content': 0.16082869470119476, 'timestamp': '2025-09-10 02:50:21.136922', 'step': 12444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:21.192091', 'step': 12444, 'epoch': 2} {'type': 'loss', 'content': 0.17984473705291748, 'timestamp': '2025-09-10 02:50:21.194502', 'step': 12445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:21.249053', 'step': 12445, 'epoch': 2} {'type': 'loss', 'content': 0.1250745952129364, 'timestamp': '2025-09-10 02:50:21.251434', 'step': 12446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:21.305094', 'step': 12446, 'epoch': 2} {'type': 'loss', 'content': 0.12326640635728836, 'timestamp': '2025-09-10 02:50:21.307167', 'step': 12447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:21.361315', 'step': 12447, 'epoch': 2} {'type': 'loss', 'content': 0.10934377461671829, 'timestamp': '2025-09-10 02:50:21.367158', 'step': 12448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:21.421116', 'step': 12448, 'epoch': 2} {'type': 'loss', 'content': 0.06676758825778961, 'timestamp': '2025-09-10 02:50:21.423408', 'step': 12449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:21.482492', 'step': 12449, 'epoch': 2} {'type': 'loss', 'content': 0.0905771553516388, 'timestamp': '2025-09-10 02:50:21.484593', 'step': 12450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:21.543299', 'step': 12450, 'epoch': 2} {'type': 'loss', 'content': 0.18850654363632202, 'timestamp': '2025-09-10 02:50:21.545490', 'step': 12451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:21.601130', 'step': 12451, 'epoch': 2} {'type': 'loss', 'content': 0.15211065113544464, 'timestamp': '2025-09-10 02:50:21.607086', 'step': 12452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:21.661923', 'step': 12452, 'epoch': 2} {'type': 'loss', 'content': 0.08468445390462875, 'timestamp': '2025-09-10 02:50:21.664297', 'step': 12453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:21.718285', 'step': 12453, 'epoch': 2} {'type': 'loss', 'content': 0.22000525891780853, 'timestamp': '2025-09-10 02:50:21.720560', 'step': 12454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:21.779155', 'step': 12454, 'epoch': 2} {'type': 'loss', 'content': 0.13117066025733948, 'timestamp': '2025-09-10 02:50:21.781369', 'step': 12455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:21.834515', 'step': 12455, 'epoch': 2} {'type': 'loss', 'content': 0.12477825582027435, 'timestamp': '2025-09-10 02:50:21.840403', 'step': 12456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:21.893895', 'step': 12456, 'epoch': 2} {'type': 'loss', 'content': 0.08157357573509216, 'timestamp': '2025-09-10 02:50:21.896062', 'step': 12457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:21.949486', 'step': 12457, 'epoch': 2} {'type': 'loss', 'content': 0.1074066162109375, 'timestamp': '2025-09-10 02:50:21.951841', 'step': 12458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:22.006289', 'step': 12458, 'epoch': 2} {'type': 'loss', 'content': 0.22729206085205078, 'timestamp': '2025-09-10 02:50:22.008474', 'step': 12459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:22.062326', 'step': 12459, 'epoch': 2} {'type': 'loss', 'content': 0.08792204409837723, 'timestamp': '2025-09-10 02:50:22.068260', 'step': 12460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:22.120871', 'step': 12460, 'epoch': 2} {'type': 'loss', 'content': 0.09103401005268097, 'timestamp': '2025-09-10 02:50:22.123382', 'step': 12461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:22.176689', 'step': 12461, 'epoch': 2} {'type': 'loss', 'content': 0.10748233646154404, 'timestamp': '2025-09-10 02:50:22.178937', 'step': 12462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:22.232222', 'step': 12462, 'epoch': 2} {'type': 'loss', 'content': 0.13463549315929413, 'timestamp': '2025-09-10 02:50:22.234308', 'step': 12463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:22.287648', 'step': 12463, 'epoch': 2} {'type': 'loss', 'content': 0.09992220997810364, 'timestamp': '2025-09-10 02:50:22.293402', 'step': 12464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:22.346256', 'step': 12464, 'epoch': 2} {'type': 'loss', 'content': 0.12094231694936752, 'timestamp': '2025-09-10 02:50:22.348305', 'step': 12465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:22.401309', 'step': 12465, 'epoch': 2} {'type': 'loss', 'content': 0.10233402997255325, 'timestamp': '2025-09-10 02:50:22.403603', 'step': 12466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:22.456838', 'step': 12466, 'epoch': 2} {'type': 'loss', 'content': 0.07999216020107269, 'timestamp': '2025-09-10 02:50:22.459077', 'step': 12467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:22.512445', 'step': 12467, 'epoch': 2} {'type': 'loss', 'content': 0.09906456619501114, 'timestamp': '2025-09-10 02:50:22.518517', 'step': 12468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:22.572104', 'step': 12468, 'epoch': 2} {'type': 'loss', 'content': 0.14606742560863495, 'timestamp': '2025-09-10 02:50:22.574481', 'step': 12469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:22.629423', 'step': 12469, 'epoch': 2} {'type': 'loss', 'content': 0.10833236575126648, 'timestamp': '2025-09-10 02:50:22.631720', 'step': 12470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:22.685800', 'step': 12470, 'epoch': 2} {'type': 'loss', 'content': 0.12899529933929443, 'timestamp': '2025-09-10 02:50:22.687985', 'step': 12471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:22.742328', 'step': 12471, 'epoch': 2} {'type': 'loss', 'content': 0.14368751645088196, 'timestamp': '2025-09-10 02:50:22.748317', 'step': 12472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:22.801438', 'step': 12472, 'epoch': 2} {'type': 'loss', 'content': 0.11453288048505783, 'timestamp': '2025-09-10 02:50:22.803851', 'step': 12473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:22.857810', 'step': 12473, 'epoch': 2} {'type': 'loss', 'content': 0.11331605911254883, 'timestamp': '2025-09-10 02:50:22.860682', 'step': 12474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:22.915325', 'step': 12474, 'epoch': 2} {'type': 'loss', 'content': 0.059547536075115204, 'timestamp': '2025-09-10 02:50:22.917629', 'step': 12475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:22.971714', 'step': 12475, 'epoch': 2} {'type': 'loss', 'content': 0.10931943356990814, 'timestamp': '2025-09-10 02:50:22.977685', 'step': 12476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:23.031152', 'step': 12476, 'epoch': 2} {'type': 'loss', 'content': 0.17663905024528503, 'timestamp': '2025-09-10 02:50:23.033364', 'step': 12477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:23.087693', 'step': 12477, 'epoch': 2} {'type': 'loss', 'content': 0.13014532625675201, 'timestamp': '2025-09-10 02:50:23.090000', 'step': 12478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:23.144505', 'step': 12478, 'epoch': 2} {'type': 'loss', 'content': 0.11784171313047409, 'timestamp': '2025-09-10 02:50:23.146959', 'step': 12479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:23.201016', 'step': 12479, 'epoch': 2} {'type': 'loss', 'content': 0.12099359184503555, 'timestamp': '2025-09-10 02:50:23.206877', 'step': 12480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:23.259787', 'step': 12480, 'epoch': 2} {'type': 'loss', 'content': 0.04346204921603203, 'timestamp': '2025-09-10 02:50:23.261851', 'step': 12481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:23.315958', 'step': 12481, 'epoch': 2} {'type': 'loss', 'content': 0.11161082237958908, 'timestamp': '2025-09-10 02:50:23.317910', 'step': 12482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:23.371679', 'step': 12482, 'epoch': 2} {'type': 'loss', 'content': 0.07636658847332001, 'timestamp': '2025-09-10 02:50:23.373550', 'step': 12483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:23.426998', 'step': 12483, 'epoch': 2} {'type': 'loss', 'content': 0.13638976216316223, 'timestamp': '2025-09-10 02:50:23.432556', 'step': 12484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:23.485692', 'step': 12484, 'epoch': 2} {'type': 'loss', 'content': 0.10994826257228851, 'timestamp': '2025-09-10 02:50:23.488033', 'step': 12485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:23.542006', 'step': 12485, 'epoch': 2} {'type': 'loss', 'content': 0.14035674929618835, 'timestamp': '2025-09-10 02:50:23.544394', 'step': 12486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:23.601823', 'step': 12486, 'epoch': 2} {'type': 'loss', 'content': 0.19844666123390198, 'timestamp': '2025-09-10 02:50:23.604167', 'step': 12487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:23.658239', 'step': 12487, 'epoch': 2} {'type': 'loss', 'content': 0.15084044635295868, 'timestamp': '2025-09-10 02:50:23.664449', 'step': 12488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:23.717684', 'step': 12488, 'epoch': 2} {'type': 'loss', 'content': 0.1458197385072708, 'timestamp': '2025-09-10 02:50:23.719765', 'step': 12489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:23.773339', 'step': 12489, 'epoch': 2} {'type': 'loss', 'content': 0.11259754002094269, 'timestamp': '2025-09-10 02:50:23.775297', 'step': 12490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:23.828260', 'step': 12490, 'epoch': 2} {'type': 'loss', 'content': 0.10290922969579697, 'timestamp': '2025-09-10 02:50:23.830449', 'step': 12491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:23.883758', 'step': 12491, 'epoch': 2} {'type': 'loss', 'content': 0.11749215424060822, 'timestamp': '2025-09-10 02:50:23.889238', 'step': 12492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:23.942772', 'step': 12492, 'epoch': 2} {'type': 'loss', 'content': 0.159474715590477, 'timestamp': '2025-09-10 02:50:23.945082', 'step': 12493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:23.998704', 'step': 12493, 'epoch': 2} {'type': 'loss', 'content': 0.13096019625663757, 'timestamp': '2025-09-10 02:50:24.000691', 'step': 12494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:24.055456', 'step': 12494, 'epoch': 2} {'type': 'loss', 'content': 0.08358701318502426, 'timestamp': '2025-09-10 02:50:24.057747', 'step': 12495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:24.111440', 'step': 12495, 'epoch': 2} {'type': 'loss', 'content': 0.25268039107322693, 'timestamp': '2025-09-10 02:50:24.117247', 'step': 12496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:24.171295', 'step': 12496, 'epoch': 2} {'type': 'loss', 'content': 0.10293222963809967, 'timestamp': '2025-09-10 02:50:24.173581', 'step': 12497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:24.226792', 'step': 12497, 'epoch': 2} {'type': 'loss', 'content': 0.13559606671333313, 'timestamp': '2025-09-10 02:50:24.228988', 'step': 12498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:24.283208', 'step': 12498, 'epoch': 2} {'type': 'loss', 'content': 0.20239058136940002, 'timestamp': '2025-09-10 02:50:24.285027', 'step': 12499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:24.339468', 'step': 12499, 'epoch': 2} {'type': 'loss', 'content': 0.16744574904441833, 'timestamp': '2025-09-10 02:50:24.345229', 'step': 12500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12500', 'timestamp': '2025-09-10 02:50:24.931156', 'step': 12500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:24.991563', 'step': 12500, 'epoch': 2} {'type': 'loss', 'content': 0.13085687160491943, 'timestamp': '2025-09-10 02:50:24.993812', 'step': 12501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:25.048647', 'step': 12501, 'epoch': 2} {'type': 'loss', 'content': 0.1547662615776062, 'timestamp': '2025-09-10 02:50:25.050975', 'step': 12502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:25.104912', 'step': 12502, 'epoch': 2} {'type': 'loss', 'content': 0.13231906294822693, 'timestamp': '2025-09-10 02:50:25.107006', 'step': 12503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:25.160343', 'step': 12503, 'epoch': 2} {'type': 'loss', 'content': 0.1867535561323166, 'timestamp': '2025-09-10 02:50:25.166254', 'step': 12504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:25.220578', 'step': 12504, 'epoch': 2} {'type': 'loss', 'content': 0.14815115928649902, 'timestamp': '2025-09-10 02:50:25.222458', 'step': 12505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:25.276170', 'step': 12505, 'epoch': 2} {'type': 'loss', 'content': 0.11872770637273788, 'timestamp': '2025-09-10 02:50:25.278062', 'step': 12506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:25.334367', 'step': 12506, 'epoch': 2} {'type': 'loss', 'content': 0.17182649672031403, 'timestamp': '2025-09-10 02:50:25.336692', 'step': 12507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:25.390034', 'step': 12507, 'epoch': 2} {'type': 'loss', 'content': 0.114166259765625, 'timestamp': '2025-09-10 02:50:25.396374', 'step': 12508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:25.450516', 'step': 12508, 'epoch': 2} {'type': 'loss', 'content': 0.09971465170383453, 'timestamp': '2025-09-10 02:50:25.458186', 'step': 12509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:25.512810', 'step': 12509, 'epoch': 2} {'type': 'loss', 'content': 0.11665894836187363, 'timestamp': '2025-09-10 02:50:25.521073', 'step': 12510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:25.583067', 'step': 12510, 'epoch': 2} {'type': 'loss', 'content': 0.13989651203155518, 'timestamp': '2025-09-10 02:50:25.589501', 'step': 12511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:25.645312', 'step': 12511, 'epoch': 2} {'type': 'loss', 'content': 0.15585224330425262, 'timestamp': '2025-09-10 02:50:25.651165', 'step': 12512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:25.703852', 'step': 12512, 'epoch': 2} {'type': 'loss', 'content': 0.1355309784412384, 'timestamp': '2025-09-10 02:50:25.706691', 'step': 12513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:25.761587', 'step': 12513, 'epoch': 2} {'type': 'loss', 'content': 0.1167522743344307, 'timestamp': '2025-09-10 02:50:25.763550', 'step': 12514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:25.821096', 'step': 12514, 'epoch': 2} {'type': 'loss', 'content': 0.14343677461147308, 'timestamp': '2025-09-10 02:50:25.823126', 'step': 12515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:25.885468', 'step': 12515, 'epoch': 2} {'type': 'loss', 'content': 0.07175228744745255, 'timestamp': '2025-09-10 02:50:25.891839', 'step': 12516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:25.945499', 'step': 12516, 'epoch': 2} {'type': 'loss', 'content': 0.23927561938762665, 'timestamp': '2025-09-10 02:50:25.947724', 'step': 12517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:26.012978', 'step': 12517, 'epoch': 2} {'type': 'loss', 'content': 0.056414894759655, 'timestamp': '2025-09-10 02:50:26.015173', 'step': 12518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:26.073159', 'step': 12518, 'epoch': 2} {'type': 'loss', 'content': 0.10395907610654831, 'timestamp': '2025-09-10 02:50:26.075311', 'step': 12519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:26.128802', 'step': 12519, 'epoch': 2} {'type': 'loss', 'content': 0.06116858497262001, 'timestamp': '2025-09-10 02:50:26.134433', 'step': 12520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:26.187091', 'step': 12520, 'epoch': 2} {'type': 'loss', 'content': 0.06980317831039429, 'timestamp': '2025-09-10 02:50:26.189154', 'step': 12521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:26.244065', 'step': 12521, 'epoch': 2} {'type': 'loss', 'content': 0.08391391485929489, 'timestamp': '2025-09-10 02:50:26.245948', 'step': 12522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:26.300723', 'step': 12522, 'epoch': 2} {'type': 'loss', 'content': 0.09263713657855988, 'timestamp': '2025-09-10 02:50:26.303048', 'step': 12523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:26.365519', 'step': 12523, 'epoch': 2} {'type': 'loss', 'content': 0.18803690373897552, 'timestamp': '2025-09-10 02:50:26.371654', 'step': 12524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:26.425904', 'step': 12524, 'epoch': 2} {'type': 'loss', 'content': 0.13766838610172272, 'timestamp': '2025-09-10 02:50:26.428112', 'step': 12525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:26.482572', 'step': 12525, 'epoch': 2} {'type': 'loss', 'content': 0.1694665253162384, 'timestamp': '2025-09-10 02:50:26.484500', 'step': 12526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:26.540289', 'step': 12526, 'epoch': 2} {'type': 'loss', 'content': 0.1591939926147461, 'timestamp': '2025-09-10 02:50:26.542409', 'step': 12527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:26.598055', 'step': 12527, 'epoch': 2} {'type': 'loss', 'content': 0.06544690579175949, 'timestamp': '2025-09-10 02:50:26.604154', 'step': 12528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:26.660060', 'step': 12528, 'epoch': 2} {'type': 'loss', 'content': 0.13219791650772095, 'timestamp': '2025-09-10 02:50:26.661964', 'step': 12529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:26.721345', 'step': 12529, 'epoch': 2} {'type': 'loss', 'content': 0.18693625926971436, 'timestamp': '2025-09-10 02:50:26.723173', 'step': 12530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:26.778583', 'step': 12530, 'epoch': 2} {'type': 'loss', 'content': 0.12181442975997925, 'timestamp': '2025-09-10 02:50:26.780576', 'step': 12531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:26.834657', 'step': 12531, 'epoch': 2} {'type': 'loss', 'content': 0.023358257487416267, 'timestamp': '2025-09-10 02:50:26.840579', 'step': 12532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:26.894129', 'step': 12532, 'epoch': 2} {'type': 'loss', 'content': 0.08343282341957092, 'timestamp': '2025-09-10 02:50:26.896331', 'step': 12533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:26.952643', 'step': 12533, 'epoch': 2} {'type': 'loss', 'content': 0.12878450751304626, 'timestamp': '2025-09-10 02:50:26.955062', 'step': 12534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:27.023291', 'step': 12534, 'epoch': 2} {'type': 'loss', 'content': 0.10691490769386292, 'timestamp': '2025-09-10 02:50:27.025381', 'step': 12535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:27.080026', 'step': 12535, 'epoch': 2} {'type': 'loss', 'content': 0.12599299848079681, 'timestamp': '2025-09-10 02:50:27.085891', 'step': 12536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:27.139437', 'step': 12536, 'epoch': 2} {'type': 'loss', 'content': 0.09861937910318375, 'timestamp': '2025-09-10 02:50:27.141580', 'step': 12537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:27.198008', 'step': 12537, 'epoch': 2} {'type': 'loss', 'content': 0.10881906747817993, 'timestamp': '2025-09-10 02:50:27.200137', 'step': 12538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:27.254752', 'step': 12538, 'epoch': 2} {'type': 'loss', 'content': 0.15050633251667023, 'timestamp': '2025-09-10 02:50:27.256967', 'step': 12539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:27.310394', 'step': 12539, 'epoch': 2} {'type': 'loss', 'content': 0.08848138153553009, 'timestamp': '2025-09-10 02:50:27.316256', 'step': 12540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:27.370615', 'step': 12540, 'epoch': 2} {'type': 'loss', 'content': 0.16386599838733673, 'timestamp': '2025-09-10 02:50:27.372551', 'step': 12541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:27.425989', 'step': 12541, 'epoch': 2} {'type': 'loss', 'content': 0.22395825386047363, 'timestamp': '2025-09-10 02:50:27.428226', 'step': 12542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:27.482116', 'step': 12542, 'epoch': 2} {'type': 'loss', 'content': 0.10251546651124954, 'timestamp': '2025-09-10 02:50:27.484452', 'step': 12543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:27.538238', 'step': 12543, 'epoch': 2} {'type': 'loss', 'content': 0.07487457245588303, 'timestamp': '2025-09-10 02:50:27.544076', 'step': 12544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:27.597545', 'step': 12544, 'epoch': 2} {'type': 'loss', 'content': 0.09804900735616684, 'timestamp': '2025-09-10 02:50:27.599768', 'step': 12545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:27.655407', 'step': 12545, 'epoch': 2} {'type': 'loss', 'content': 0.08104822784662247, 'timestamp': '2025-09-10 02:50:27.657470', 'step': 12546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:27.711031', 'step': 12546, 'epoch': 2} {'type': 'loss', 'content': 0.2000540941953659, 'timestamp': '2025-09-10 02:50:27.713402', 'step': 12547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:27.767437', 'step': 12547, 'epoch': 2} {'type': 'loss', 'content': 0.12040477246046066, 'timestamp': '2025-09-10 02:50:27.773466', 'step': 12548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:27.826140', 'step': 12548, 'epoch': 2} {'type': 'loss', 'content': 0.08889992535114288, 'timestamp': '2025-09-10 02:50:27.828367', 'step': 12549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:27.882343', 'step': 12549, 'epoch': 2} {'type': 'loss', 'content': 0.17754493653774261, 'timestamp': '2025-09-10 02:50:27.884634', 'step': 12550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:27.938312', 'step': 12550, 'epoch': 2} {'type': 'loss', 'content': 0.18088673055171967, 'timestamp': '2025-09-10 02:50:27.940618', 'step': 12551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:27.993872', 'step': 12551, 'epoch': 2} {'type': 'loss', 'content': 0.19895590841770172, 'timestamp': '2025-09-10 02:50:28.000060', 'step': 12552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:28.053426', 'step': 12552, 'epoch': 2} {'type': 'loss', 'content': 0.07064255326986313, 'timestamp': '2025-09-10 02:50:28.055717', 'step': 12553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:28.109309', 'step': 12553, 'epoch': 2} {'type': 'loss', 'content': 0.1324586272239685, 'timestamp': '2025-09-10 02:50:28.111608', 'step': 12554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:28.165261', 'step': 12554, 'epoch': 2} {'type': 'loss', 'content': 0.18021896481513977, 'timestamp': '2025-09-10 02:50:28.167595', 'step': 12555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:28.222272', 'step': 12555, 'epoch': 2} {'type': 'loss', 'content': 0.17263305187225342, 'timestamp': '2025-09-10 02:50:28.228496', 'step': 12556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:28.282404', 'step': 12556, 'epoch': 2} {'type': 'loss', 'content': 0.11178022623062134, 'timestamp': '2025-09-10 02:50:28.285177', 'step': 12557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:28.339625', 'step': 12557, 'epoch': 2} {'type': 'loss', 'content': 0.09375003725290298, 'timestamp': '2025-09-10 02:50:28.341966', 'step': 12558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:28.396001', 'step': 12558, 'epoch': 2} {'type': 'loss', 'content': 0.15750961005687714, 'timestamp': '2025-09-10 02:50:28.398197', 'step': 12559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:28.452781', 'step': 12559, 'epoch': 2} {'type': 'loss', 'content': 0.1298520267009735, 'timestamp': '2025-09-10 02:50:28.459056', 'step': 12560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:28.512447', 'step': 12560, 'epoch': 2} {'type': 'loss', 'content': 0.08478160947561264, 'timestamp': '2025-09-10 02:50:28.514729', 'step': 12561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:28.568864', 'step': 12561, 'epoch': 2} {'type': 'loss', 'content': 0.16940632462501526, 'timestamp': '2025-09-10 02:50:28.571001', 'step': 12562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:28.625463', 'step': 12562, 'epoch': 2} {'type': 'loss', 'content': 0.2078421413898468, 'timestamp': '2025-09-10 02:50:28.627737', 'step': 12563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:28.681256', 'step': 12563, 'epoch': 2} {'type': 'loss', 'content': 0.19604454934597015, 'timestamp': '2025-09-10 02:50:28.687334', 'step': 12564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:28.740562', 'step': 12564, 'epoch': 2} {'type': 'loss', 'content': 0.13353125751018524, 'timestamp': '2025-09-10 02:50:28.742519', 'step': 12565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:28.795709', 'step': 12565, 'epoch': 2} {'type': 'loss', 'content': 0.08406169712543488, 'timestamp': '2025-09-10 02:50:28.797796', 'step': 12566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:28.852760', 'step': 12566, 'epoch': 2} {'type': 'loss', 'content': 0.11987164616584778, 'timestamp': '2025-09-10 02:50:28.854886', 'step': 12567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:28.908146', 'step': 12567, 'epoch': 2} {'type': 'loss', 'content': 0.10912896692752838, 'timestamp': '2025-09-10 02:50:28.913965', 'step': 12568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:28.969131', 'step': 12568, 'epoch': 2} {'type': 'loss', 'content': 0.19403311610221863, 'timestamp': '2025-09-10 02:50:28.971297', 'step': 12569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:29.031068', 'step': 12569, 'epoch': 2} {'type': 'loss', 'content': 0.2134735882282257, 'timestamp': '2025-09-10 02:50:29.033224', 'step': 12570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:29.087320', 'step': 12570, 'epoch': 2} {'type': 'loss', 'content': 0.12846721708774567, 'timestamp': '2025-09-10 02:50:29.089385', 'step': 12571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:29.143028', 'step': 12571, 'epoch': 2} {'type': 'loss', 'content': 0.08734790980815887, 'timestamp': '2025-09-10 02:50:29.148930', 'step': 12572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:29.201680', 'step': 12572, 'epoch': 2} {'type': 'loss', 'content': 0.11354990303516388, 'timestamp': '2025-09-10 02:50:29.203820', 'step': 12573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:29.257632', 'step': 12573, 'epoch': 2} {'type': 'loss', 'content': 0.17487354576587677, 'timestamp': '2025-09-10 02:50:29.260004', 'step': 12574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:29.313523', 'step': 12574, 'epoch': 2} {'type': 'loss', 'content': 0.11872132867574692, 'timestamp': '2025-09-10 02:50:29.315959', 'step': 12575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:29.369766', 'step': 12575, 'epoch': 2} {'type': 'loss', 'content': 0.15323194861412048, 'timestamp': '2025-09-10 02:50:29.375542', 'step': 12576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:29.428842', 'step': 12576, 'epoch': 2} {'type': 'loss', 'content': 0.0993262529373169, 'timestamp': '2025-09-10 02:50:29.431132', 'step': 12577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:29.484632', 'step': 12577, 'epoch': 2} {'type': 'loss', 'content': 0.1758211851119995, 'timestamp': '2025-09-10 02:50:29.486968', 'step': 12578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:29.540979', 'step': 12578, 'epoch': 2} {'type': 'loss', 'content': 0.14138846099376678, 'timestamp': '2025-09-10 02:50:29.543227', 'step': 12579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:29.600027', 'step': 12579, 'epoch': 2} {'type': 'loss', 'content': 0.1720927506685257, 'timestamp': '2025-09-10 02:50:29.605931', 'step': 12580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:29.659279', 'step': 12580, 'epoch': 2} {'type': 'loss', 'content': 0.1164257749915123, 'timestamp': '2025-09-10 02:50:29.661388', 'step': 12581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:29.714460', 'step': 12581, 'epoch': 2} {'type': 'loss', 'content': 0.16468577086925507, 'timestamp': '2025-09-10 02:50:29.717688', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:50:42.743570', 'step': 12582, 'epoch': 2} {'type': 'pplx', 'content': 14139.419693623064, 'timestamp': '2025-09-10 02:50:42.746786', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:42.802317', 'step': 12582, 'epoch': 2} {'type': 'loss', 'content': 0.08478466421365738, 'timestamp': '2025-09-10 02:50:42.804688', 'step': 12583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:42.860190', 'step': 12583, 'epoch': 2} {'type': 'loss', 'content': 0.09404836595058441, 'timestamp': '2025-09-10 02:50:42.866789', 'step': 12584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:42.920470', 'step': 12584, 'epoch': 2} {'type': 'loss', 'content': 0.10903695225715637, 'timestamp': '2025-09-10 02:50:42.922694', 'step': 12585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:42.976941', 'step': 12585, 'epoch': 2} {'type': 'loss', 'content': 0.1326475888490677, 'timestamp': '2025-09-10 02:50:42.979286', 'step': 12586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:43.034638', 'step': 12586, 'epoch': 2} {'type': 'loss', 'content': 0.06375806778669357, 'timestamp': '2025-09-10 02:50:43.036910', 'step': 12587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:43.091676', 'step': 12587, 'epoch': 2} {'type': 'loss', 'content': 0.1788010448217392, 'timestamp': '2025-09-10 02:50:43.097446', 'step': 12588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:43.151698', 'step': 12588, 'epoch': 2} {'type': 'loss', 'content': 0.1369977742433548, 'timestamp': '2025-09-10 02:50:43.154210', 'step': 12589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:43.208620', 'step': 12589, 'epoch': 2} {'type': 'loss', 'content': 0.13127443194389343, 'timestamp': '2025-09-10 02:50:43.211051', 'step': 12590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:43.266445', 'step': 12590, 'epoch': 2} {'type': 'loss', 'content': 0.06262937933206558, 'timestamp': '2025-09-10 02:50:43.268902', 'step': 12591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:43.323361', 'step': 12591, 'epoch': 2} {'type': 'loss', 'content': 0.0910891443490982, 'timestamp': '2025-09-10 02:50:43.329885', 'step': 12592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:43.384588', 'step': 12592, 'epoch': 2} {'type': 'loss', 'content': 0.10960140824317932, 'timestamp': '2025-09-10 02:50:43.387049', 'step': 12593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:43.441102', 'step': 12593, 'epoch': 2} {'type': 'loss', 'content': 0.1025635376572609, 'timestamp': '2025-09-10 02:50:43.443598', 'step': 12594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:43.498276', 'step': 12594, 'epoch': 2} {'type': 'loss', 'content': 0.15755493938922882, 'timestamp': '2025-09-10 02:50:43.500712', 'step': 12595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:43.554254', 'step': 12595, 'epoch': 2} {'type': 'loss', 'content': 0.1485985368490219, 'timestamp': '2025-09-10 02:50:43.560776', 'step': 12596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:43.614740', 'step': 12596, 'epoch': 2} {'type': 'loss', 'content': 0.10325273126363754, 'timestamp': '2025-09-10 02:50:43.617300', 'step': 12597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:43.674722', 'step': 12597, 'epoch': 2} {'type': 'loss', 'content': 0.188228577375412, 'timestamp': '2025-09-10 02:50:43.677443', 'step': 12598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:43.734534', 'step': 12598, 'epoch': 2} {'type': 'loss', 'content': 0.20829367637634277, 'timestamp': '2025-09-10 02:50:43.737032', 'step': 12599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:43.792718', 'step': 12599, 'epoch': 2} {'type': 'loss', 'content': 0.1283664107322693, 'timestamp': '2025-09-10 02:50:43.799339', 'step': 12600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:43.854324', 'step': 12600, 'epoch': 2} {'type': 'loss', 'content': 0.13308021426200867, 'timestamp': '2025-09-10 02:50:43.856701', 'step': 12601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:43.913752', 'step': 12601, 'epoch': 2} {'type': 'loss', 'content': 0.1290842890739441, 'timestamp': '2025-09-10 02:50:43.916325', 'step': 12602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:43.971064', 'step': 12602, 'epoch': 2} {'type': 'loss', 'content': 0.08661790192127228, 'timestamp': '2025-09-10 02:50:43.973474', 'step': 12603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:44.026977', 'step': 12603, 'epoch': 2} {'type': 'loss', 'content': 0.09857120364904404, 'timestamp': '2025-09-10 02:50:44.033330', 'step': 12604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:44.096035', 'step': 12604, 'epoch': 2} {'type': 'loss', 'content': 0.09203638881444931, 'timestamp': '2025-09-10 02:50:44.098591', 'step': 12605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:44.155636', 'step': 12605, 'epoch': 2} {'type': 'loss', 'content': 0.11657930910587311, 'timestamp': '2025-09-10 02:50:44.158128', 'step': 12606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:44.218172', 'step': 12606, 'epoch': 2} {'type': 'loss', 'content': 0.09578645974397659, 'timestamp': '2025-09-10 02:50:44.220713', 'step': 12607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:44.278762', 'step': 12607, 'epoch': 2} {'type': 'loss', 'content': 0.18129195272922516, 'timestamp': '2025-09-10 02:50:44.285400', 'step': 12608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:44.343470', 'step': 12608, 'epoch': 2} {'type': 'loss', 'content': 0.02101375162601471, 'timestamp': '2025-09-10 02:50:44.345889', 'step': 12609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:44.400774', 'step': 12609, 'epoch': 2} {'type': 'loss', 'content': 0.1618911623954773, 'timestamp': '2025-09-10 02:50:44.403293', 'step': 12610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:44.457323', 'step': 12610, 'epoch': 2} {'type': 'loss', 'content': 0.06943076103925705, 'timestamp': '2025-09-10 02:50:44.459875', 'step': 12611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:44.514564', 'step': 12611, 'epoch': 2} {'type': 'loss', 'content': 0.12043653428554535, 'timestamp': '2025-09-10 02:50:44.521216', 'step': 12612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:44.575842', 'step': 12612, 'epoch': 2} {'type': 'loss', 'content': 0.1304091066122055, 'timestamp': '2025-09-10 02:50:44.578566', 'step': 12613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:44.635543', 'step': 12613, 'epoch': 2} {'type': 'loss', 'content': 0.12990325689315796, 'timestamp': '2025-09-10 02:50:44.637983', 'step': 12614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:44.694367', 'step': 12614, 'epoch': 2} {'type': 'loss', 'content': 0.12712176144123077, 'timestamp': '2025-09-10 02:50:44.696804', 'step': 12615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:44.751573', 'step': 12615, 'epoch': 2} {'type': 'loss', 'content': 0.0883934423327446, 'timestamp': '2025-09-10 02:50:44.758475', 'step': 12616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:44.814681', 'step': 12616, 'epoch': 2} {'type': 'loss', 'content': 0.10525951534509659, 'timestamp': '2025-09-10 02:50:44.822628', 'step': 12617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:44.882650', 'step': 12617, 'epoch': 2} {'type': 'loss', 'content': 0.10474070906639099, 'timestamp': '2025-09-10 02:50:44.886999', 'step': 12618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:44.943454', 'step': 12618, 'epoch': 2} {'type': 'loss', 'content': 0.113088458776474, 'timestamp': '2025-09-10 02:50:44.945901', 'step': 12619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:45.000082', 'step': 12619, 'epoch': 2} {'type': 'loss', 'content': 0.1544893980026245, 'timestamp': '2025-09-10 02:50:45.006588', 'step': 12620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:45.061105', 'step': 12620, 'epoch': 2} {'type': 'loss', 'content': 0.06588493287563324, 'timestamp': '2025-09-10 02:50:45.063646', 'step': 12621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:45.117174', 'step': 12621, 'epoch': 2} {'type': 'loss', 'content': 0.11738543957471848, 'timestamp': '2025-09-10 02:50:45.119690', 'step': 12622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:45.173749', 'step': 12622, 'epoch': 2} {'type': 'loss', 'content': 0.16439774632453918, 'timestamp': '2025-09-10 02:50:45.177490', 'step': 12623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:45.232835', 'step': 12623, 'epoch': 2} {'type': 'loss', 'content': 0.14279146492481232, 'timestamp': '2025-09-10 02:50:45.239636', 'step': 12624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:45.293569', 'step': 12624, 'epoch': 2} {'type': 'loss', 'content': 0.16340559720993042, 'timestamp': '2025-09-10 02:50:45.295905', 'step': 12625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:45.351077', 'step': 12625, 'epoch': 2} {'type': 'loss', 'content': 0.1054714098572731, 'timestamp': '2025-09-10 02:50:45.353695', 'step': 12626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:45.409295', 'step': 12626, 'epoch': 2} {'type': 'loss', 'content': 0.14839985966682434, 'timestamp': '2025-09-10 02:50:45.412309', 'step': 12627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:45.467314', 'step': 12627, 'epoch': 2} {'type': 'loss', 'content': 0.08164183050394058, 'timestamp': '2025-09-10 02:50:45.474736', 'step': 12628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:45.530805', 'step': 12628, 'epoch': 2} {'type': 'loss', 'content': 0.2184317409992218, 'timestamp': '2025-09-10 02:50:45.533415', 'step': 12629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:45.590718', 'step': 12629, 'epoch': 2} {'type': 'loss', 'content': 0.12542220950126648, 'timestamp': '2025-09-10 02:50:45.593198', 'step': 12630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:45.647080', 'step': 12630, 'epoch': 2} {'type': 'loss', 'content': 0.18167048692703247, 'timestamp': '2025-09-10 02:50:45.649444', 'step': 12631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:45.704262', 'step': 12631, 'epoch': 2} {'type': 'loss', 'content': 0.15905393660068512, 'timestamp': '2025-09-10 02:50:45.711022', 'step': 12632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:45.768801', 'step': 12632, 'epoch': 2} {'type': 'loss', 'content': 0.05875621363520622, 'timestamp': '2025-09-10 02:50:45.771970', 'step': 12633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:45.827241', 'step': 12633, 'epoch': 2} {'type': 'loss', 'content': 0.11598481237888336, 'timestamp': '2025-09-10 02:50:45.829586', 'step': 12634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:45.888730', 'step': 12634, 'epoch': 2} {'type': 'loss', 'content': 0.13016749918460846, 'timestamp': '2025-09-10 02:50:45.894309', 'step': 12635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:45.952442', 'step': 12635, 'epoch': 2} {'type': 'loss', 'content': 0.06192515045404434, 'timestamp': '2025-09-10 02:50:45.958939', 'step': 12636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:46.013227', 'step': 12636, 'epoch': 2} {'type': 'loss', 'content': 0.1019289493560791, 'timestamp': '2025-09-10 02:50:46.020687', 'step': 12637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:46.075834', 'step': 12637, 'epoch': 2} {'type': 'loss', 'content': 0.04646306112408638, 'timestamp': '2025-09-10 02:50:46.078126', 'step': 12638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:46.132291', 'step': 12638, 'epoch': 2} {'type': 'loss', 'content': 0.10582400113344193, 'timestamp': '2025-09-10 02:50:46.134593', 'step': 12639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:46.191930', 'step': 12639, 'epoch': 2} {'type': 'loss', 'content': 0.13894647359848022, 'timestamp': '2025-09-10 02:50:46.198318', 'step': 12640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:46.252006', 'step': 12640, 'epoch': 2} {'type': 'loss', 'content': 0.12146732211112976, 'timestamp': '2025-09-10 02:50:46.255585', 'step': 12641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:46.314674', 'step': 12641, 'epoch': 2} {'type': 'loss', 'content': 0.13898025453090668, 'timestamp': '2025-09-10 02:50:46.318136', 'step': 12642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:46.374148', 'step': 12642, 'epoch': 2} {'type': 'loss', 'content': 0.11244087666273117, 'timestamp': '2025-09-10 02:50:46.376569', 'step': 12643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:46.430547', 'step': 12643, 'epoch': 2} {'type': 'loss', 'content': 0.1500273197889328, 'timestamp': '2025-09-10 02:50:46.436668', 'step': 12644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:46.488922', 'step': 12644, 'epoch': 2} {'type': 'loss', 'content': 0.16374525427818298, 'timestamp': '2025-09-10 02:50:46.490960', 'step': 12645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:46.543965', 'step': 12645, 'epoch': 2} {'type': 'loss', 'content': 0.13739165663719177, 'timestamp': '2025-09-10 02:50:46.547393', 'step': 12646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:46.600897', 'step': 12646, 'epoch': 2} {'type': 'loss', 'content': 0.09509091824293137, 'timestamp': '2025-09-10 02:50:46.603235', 'step': 12647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:46.656766', 'step': 12647, 'epoch': 2} {'type': 'loss', 'content': 0.18295466899871826, 'timestamp': '2025-09-10 02:50:46.662905', 'step': 12648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:46.716295', 'step': 12648, 'epoch': 2} {'type': 'loss', 'content': 0.1665981560945511, 'timestamp': '2025-09-10 02:50:46.721116', 'step': 12649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:46.777823', 'step': 12649, 'epoch': 2} {'type': 'loss', 'content': 0.11202258616685867, 'timestamp': '2025-09-10 02:50:46.780316', 'step': 12650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:46.835571', 'step': 12650, 'epoch': 2} {'type': 'loss', 'content': 0.10767603665590286, 'timestamp': '2025-09-10 02:50:46.837674', 'step': 12651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:46.890460', 'step': 12651, 'epoch': 2} {'type': 'loss', 'content': 0.14877218008041382, 'timestamp': '2025-09-10 02:50:46.896525', 'step': 12652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:46.956870', 'step': 12652, 'epoch': 2} {'type': 'loss', 'content': 0.10269318521022797, 'timestamp': '2025-09-10 02:50:46.959183', 'step': 12653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:47.012692', 'step': 12653, 'epoch': 2} {'type': 'loss', 'content': 0.11252900213003159, 'timestamp': '2025-09-10 02:50:47.016829', 'step': 12654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:47.070913', 'step': 12654, 'epoch': 2} {'type': 'loss', 'content': 0.11797735095024109, 'timestamp': '2025-09-10 02:50:47.073296', 'step': 12655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:47.127014', 'step': 12655, 'epoch': 2} {'type': 'loss', 'content': 0.1061999648809433, 'timestamp': '2025-09-10 02:50:47.134187', 'step': 12656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:47.191410', 'step': 12656, 'epoch': 2} {'type': 'loss', 'content': 0.20328868925571442, 'timestamp': '2025-09-10 02:50:47.193616', 'step': 12657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:47.246587', 'step': 12657, 'epoch': 2} {'type': 'loss', 'content': 0.1437583565711975, 'timestamp': '2025-09-10 02:50:47.248915', 'step': 12658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:47.302213', 'step': 12658, 'epoch': 2} {'type': 'loss', 'content': 0.10742524266242981, 'timestamp': '2025-09-10 02:50:47.304467', 'step': 12659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:47.359290', 'step': 12659, 'epoch': 2} {'type': 'loss', 'content': 0.1883363425731659, 'timestamp': '2025-09-10 02:50:47.365244', 'step': 12660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:47.419311', 'step': 12660, 'epoch': 2} {'type': 'loss', 'content': 0.2090405374765396, 'timestamp': '2025-09-10 02:50:47.421407', 'step': 12661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:47.476139', 'step': 12661, 'epoch': 2} {'type': 'loss', 'content': 0.14847880601882935, 'timestamp': '2025-09-10 02:50:47.478431', 'step': 12662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:47.532649', 'step': 12662, 'epoch': 2} {'type': 'loss', 'content': 0.12804563343524933, 'timestamp': '2025-09-10 02:50:47.534956', 'step': 12663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:47.588441', 'step': 12663, 'epoch': 2} {'type': 'loss', 'content': 0.11364813148975372, 'timestamp': '2025-09-10 02:50:47.594348', 'step': 12664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:47.647943', 'step': 12664, 'epoch': 2} {'type': 'loss', 'content': 0.0776672437787056, 'timestamp': '2025-09-10 02:50:47.650320', 'step': 12665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:47.703395', 'step': 12665, 'epoch': 2} {'type': 'loss', 'content': 0.08285602927207947, 'timestamp': '2025-09-10 02:50:47.705534', 'step': 12666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:47.758464', 'step': 12666, 'epoch': 2} {'type': 'loss', 'content': 0.13632452487945557, 'timestamp': '2025-09-10 02:50:47.760819', 'step': 12667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:47.815851', 'step': 12667, 'epoch': 2} {'type': 'loss', 'content': 0.1287413239479065, 'timestamp': '2025-09-10 02:50:47.821966', 'step': 12668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:47.875232', 'step': 12668, 'epoch': 2} {'type': 'loss', 'content': 0.15393103659152985, 'timestamp': '2025-09-10 02:50:47.877700', 'step': 12669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:47.937739', 'step': 12669, 'epoch': 2} {'type': 'loss', 'content': 0.12390688061714172, 'timestamp': '2025-09-10 02:50:47.940361', 'step': 12670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:47.993473', 'step': 12670, 'epoch': 2} {'type': 'loss', 'content': 0.06432729214429855, 'timestamp': '2025-09-10 02:50:47.995832', 'step': 12671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:48.048947', 'step': 12671, 'epoch': 2} {'type': 'loss', 'content': 0.12712745368480682, 'timestamp': '2025-09-10 02:50:48.055068', 'step': 12672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:48.107894', 'step': 12672, 'epoch': 2} {'type': 'loss', 'content': 0.07627750188112259, 'timestamp': '2025-09-10 02:50:48.110364', 'step': 12673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:48.162990', 'step': 12673, 'epoch': 2} {'type': 'loss', 'content': 0.17565718293190002, 'timestamp': '2025-09-10 02:50:48.165357', 'step': 12674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:48.218638', 'step': 12674, 'epoch': 2} {'type': 'loss', 'content': 0.08617440611124039, 'timestamp': '2025-09-10 02:50:48.220792', 'step': 12675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:48.274475', 'step': 12675, 'epoch': 2} {'type': 'loss', 'content': 0.1274004429578781, 'timestamp': '2025-09-10 02:50:48.280444', 'step': 12676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:48.332977', 'step': 12676, 'epoch': 2} {'type': 'loss', 'content': 0.0880008190870285, 'timestamp': '2025-09-10 02:50:48.335471', 'step': 12677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:48.389153', 'step': 12677, 'epoch': 2} {'type': 'loss', 'content': 0.11769649386405945, 'timestamp': '2025-09-10 02:50:48.391311', 'step': 12678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:48.447723', 'step': 12678, 'epoch': 2} {'type': 'loss', 'content': 0.09850854426622391, 'timestamp': '2025-09-10 02:50:48.449848', 'step': 12679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:48.506056', 'step': 12679, 'epoch': 2} {'type': 'loss', 'content': 0.13436153531074524, 'timestamp': '2025-09-10 02:50:48.512290', 'step': 12680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:48.565800', 'step': 12680, 'epoch': 2} {'type': 'loss', 'content': 0.21077118813991547, 'timestamp': '2025-09-10 02:50:48.567886', 'step': 12681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:48.625081', 'step': 12681, 'epoch': 2} {'type': 'loss', 'content': 0.19473031163215637, 'timestamp': '2025-09-10 02:50:48.627195', 'step': 12682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:48.681522', 'step': 12682, 'epoch': 2} {'type': 'loss', 'content': 0.06830234080553055, 'timestamp': '2025-09-10 02:50:48.683852', 'step': 12683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:48.737410', 'step': 12683, 'epoch': 2} {'type': 'loss', 'content': 0.10645532608032227, 'timestamp': '2025-09-10 02:50:48.743560', 'step': 12684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:48.797235', 'step': 12684, 'epoch': 2} {'type': 'loss', 'content': 0.08097772300243378, 'timestamp': '2025-09-10 02:50:48.799423', 'step': 12685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:48.855148', 'step': 12685, 'epoch': 2} {'type': 'loss', 'content': 0.08670870214700699, 'timestamp': '2025-09-10 02:50:48.857507', 'step': 12686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:48.913207', 'step': 12686, 'epoch': 2} {'type': 'loss', 'content': 0.14221462607383728, 'timestamp': '2025-09-10 02:50:48.915499', 'step': 12687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:48.971664', 'step': 12687, 'epoch': 2} {'type': 'loss', 'content': 0.06909342855215073, 'timestamp': '2025-09-10 02:50:48.978071', 'step': 12688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:49.039495', 'step': 12688, 'epoch': 2} {'type': 'loss', 'content': 0.1677352637052536, 'timestamp': '2025-09-10 02:50:49.041687', 'step': 12689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:49.094491', 'step': 12689, 'epoch': 2} {'type': 'loss', 'content': 0.13680779933929443, 'timestamp': '2025-09-10 02:50:49.096645', 'step': 12690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:49.149851', 'step': 12690, 'epoch': 2} {'type': 'loss', 'content': 0.038818683475255966, 'timestamp': '2025-09-10 02:50:49.152372', 'step': 12691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:49.206306', 'step': 12691, 'epoch': 2} {'type': 'loss', 'content': 0.11042878031730652, 'timestamp': '2025-09-10 02:50:49.212512', 'step': 12692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:49.271400', 'step': 12692, 'epoch': 2} {'type': 'loss', 'content': 0.12033703178167343, 'timestamp': '2025-09-10 02:50:49.273478', 'step': 12693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:49.335067', 'step': 12693, 'epoch': 2} {'type': 'loss', 'content': 0.19171172380447388, 'timestamp': '2025-09-10 02:50:49.337355', 'step': 12694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:49.395846', 'step': 12694, 'epoch': 2} {'type': 'loss', 'content': 0.03071933053433895, 'timestamp': '2025-09-10 02:50:49.398167', 'step': 12695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:49.451430', 'step': 12695, 'epoch': 2} {'type': 'loss', 'content': 0.10197757929563522, 'timestamp': '2025-09-10 02:50:49.457483', 'step': 12696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:49.513151', 'step': 12696, 'epoch': 2} {'type': 'loss', 'content': 0.07912188023328781, 'timestamp': '2025-09-10 02:50:49.515351', 'step': 12697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:49.568865', 'step': 12697, 'epoch': 2} {'type': 'loss', 'content': 0.18748171627521515, 'timestamp': '2025-09-10 02:50:49.571085', 'step': 12698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:49.625452', 'step': 12698, 'epoch': 2} {'type': 'loss', 'content': 0.09858959913253784, 'timestamp': '2025-09-10 02:50:49.627863', 'step': 12699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:49.687859', 'step': 12699, 'epoch': 2} {'type': 'loss', 'content': 0.1853513866662979, 'timestamp': '2025-09-10 02:50:49.694145', 'step': 12700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:49.749511', 'step': 12700, 'epoch': 2} {'type': 'loss', 'content': 0.12247016280889511, 'timestamp': '2025-09-10 02:50:49.751670', 'step': 12701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:49.808364', 'step': 12701, 'epoch': 2} {'type': 'loss', 'content': 0.10004192590713501, 'timestamp': '2025-09-10 02:50:49.810683', 'step': 12702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:49.867076', 'step': 12702, 'epoch': 2} {'type': 'loss', 'content': 0.08796755224466324, 'timestamp': '2025-09-10 02:50:49.869194', 'step': 12703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:49.923460', 'step': 12703, 'epoch': 2} {'type': 'loss', 'content': 0.13290004432201385, 'timestamp': '2025-09-10 02:50:49.929631', 'step': 12704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:49.982982', 'step': 12704, 'epoch': 2} {'type': 'loss', 'content': 0.045504506677389145, 'timestamp': '2025-09-10 02:50:49.985112', 'step': 12705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:50.039407', 'step': 12705, 'epoch': 2} {'type': 'loss', 'content': 0.11886464059352875, 'timestamp': '2025-09-10 02:50:50.041595', 'step': 12706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:50.100457', 'step': 12706, 'epoch': 2} {'type': 'loss', 'content': 0.11063637584447861, 'timestamp': '2025-09-10 02:50:50.102662', 'step': 12707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:50.156228', 'step': 12707, 'epoch': 2} {'type': 'loss', 'content': 0.09896876662969589, 'timestamp': '2025-09-10 02:50:50.162401', 'step': 12708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:50.217920', 'step': 12708, 'epoch': 2} {'type': 'loss', 'content': 0.0940529927611351, 'timestamp': '2025-09-10 02:50:50.220395', 'step': 12709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:50.274026', 'step': 12709, 'epoch': 2} {'type': 'loss', 'content': 0.1632733941078186, 'timestamp': '2025-09-10 02:50:50.276192', 'step': 12710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:50.331046', 'step': 12710, 'epoch': 2} {'type': 'loss', 'content': 0.09013522416353226, 'timestamp': '2025-09-10 02:50:50.333294', 'step': 12711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:50.388089', 'step': 12711, 'epoch': 2} {'type': 'loss', 'content': 0.16125257313251495, 'timestamp': '2025-09-10 02:50:50.394265', 'step': 12712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:50.450845', 'step': 12712, 'epoch': 2} {'type': 'loss', 'content': 0.14536608755588531, 'timestamp': '2025-09-10 02:50:50.453209', 'step': 12713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:50.510079', 'step': 12713, 'epoch': 2} {'type': 'loss', 'content': 0.16242580115795135, 'timestamp': '2025-09-10 02:50:50.512345', 'step': 12714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:50.568299', 'step': 12714, 'epoch': 2} {'type': 'loss', 'content': 0.1608494520187378, 'timestamp': '2025-09-10 02:50:50.570450', 'step': 12715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:50.627003', 'step': 12715, 'epoch': 2} {'type': 'loss', 'content': 0.12456820160150528, 'timestamp': '2025-09-10 02:50:50.633431', 'step': 12716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:50.687975', 'step': 12716, 'epoch': 2} {'type': 'loss', 'content': 0.1640416383743286, 'timestamp': '2025-09-10 02:50:50.690158', 'step': 12717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:50.743843', 'step': 12717, 'epoch': 2} {'type': 'loss', 'content': 0.09360336512327194, 'timestamp': '2025-09-10 02:50:50.746003', 'step': 12718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:50.800698', 'step': 12718, 'epoch': 2} {'type': 'loss', 'content': 0.05075380951166153, 'timestamp': '2025-09-10 02:50:50.802710', 'step': 12719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:50.857588', 'step': 12719, 'epoch': 2} {'type': 'loss', 'content': 0.07048068195581436, 'timestamp': '2025-09-10 02:50:50.863649', 'step': 12720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:50.916912', 'step': 12720, 'epoch': 2} {'type': 'loss', 'content': 0.08805430680513382, 'timestamp': '2025-09-10 02:50:50.918894', 'step': 12721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:50.972562', 'step': 12721, 'epoch': 2} {'type': 'loss', 'content': 0.06424111872911453, 'timestamp': '2025-09-10 02:50:50.975119', 'step': 12722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:51.028655', 'step': 12722, 'epoch': 2} {'type': 'loss', 'content': 0.0627705529332161, 'timestamp': '2025-09-10 02:50:51.031075', 'step': 12723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:51.084519', 'step': 12723, 'epoch': 2} {'type': 'loss', 'content': 0.08651809394359589, 'timestamp': '2025-09-10 02:50:51.090747', 'step': 12724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:51.143937', 'step': 12724, 'epoch': 2} {'type': 'loss', 'content': 0.21676665544509888, 'timestamp': '2025-09-10 02:50:51.146264', 'step': 12725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:51.199947', 'step': 12725, 'epoch': 2} {'type': 'loss', 'content': 0.14220848679542542, 'timestamp': '2025-09-10 02:50:51.202063', 'step': 12726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:51.256580', 'step': 12726, 'epoch': 2} {'type': 'loss', 'content': 0.1060071587562561, 'timestamp': '2025-09-10 02:50:51.258669', 'step': 12727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:51.313683', 'step': 12727, 'epoch': 2} {'type': 'loss', 'content': 0.04616837948560715, 'timestamp': '2025-09-10 02:50:51.319969', 'step': 12728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:51.373983', 'step': 12728, 'epoch': 2} {'type': 'loss', 'content': 0.14765489101409912, 'timestamp': '2025-09-10 02:50:51.376184', 'step': 12729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:51.431053', 'step': 12729, 'epoch': 2} {'type': 'loss', 'content': 0.11243890970945358, 'timestamp': '2025-09-10 02:50:51.433295', 'step': 12730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:51.486983', 'step': 12730, 'epoch': 2} {'type': 'loss', 'content': 0.14175190031528473, 'timestamp': '2025-09-10 02:50:51.489435', 'step': 12731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:51.543171', 'step': 12731, 'epoch': 2} {'type': 'loss', 'content': 0.06699123978614807, 'timestamp': '2025-09-10 02:50:51.549556', 'step': 12732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:51.602540', 'step': 12732, 'epoch': 2} {'type': 'loss', 'content': 0.17151975631713867, 'timestamp': '2025-09-10 02:50:51.604682', 'step': 12733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:51.658793', 'step': 12733, 'epoch': 2} {'type': 'loss', 'content': 0.2230129987001419, 'timestamp': '2025-09-10 02:50:51.662766', 'step': 12734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:51.716597', 'step': 12734, 'epoch': 2} {'type': 'loss', 'content': 0.1996641755104065, 'timestamp': '2025-09-10 02:50:51.718961', 'step': 12735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:51.773080', 'step': 12735, 'epoch': 2} {'type': 'loss', 'content': 0.09796985983848572, 'timestamp': '2025-09-10 02:50:51.779432', 'step': 12736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:51.833224', 'step': 12736, 'epoch': 2} {'type': 'loss', 'content': 0.060212843120098114, 'timestamp': '2025-09-10 02:50:51.836068', 'step': 12737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:51.891464', 'step': 12737, 'epoch': 2} {'type': 'loss', 'content': 0.11818507313728333, 'timestamp': '2025-09-10 02:50:51.893349', 'step': 12738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:51.947862', 'step': 12738, 'epoch': 2} {'type': 'loss', 'content': 0.22213232517242432, 'timestamp': '2025-09-10 02:50:51.950070', 'step': 12739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:52.004507', 'step': 12739, 'epoch': 2} {'type': 'loss', 'content': 0.07739688456058502, 'timestamp': '2025-09-10 02:50:52.010452', 'step': 12740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:52.064051', 'step': 12740, 'epoch': 2} {'type': 'loss', 'content': 0.15868324041366577, 'timestamp': '2025-09-10 02:50:52.066566', 'step': 12741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:52.123712', 'step': 12741, 'epoch': 2} {'type': 'loss', 'content': 0.1708824783563614, 'timestamp': '2025-09-10 02:50:52.126086', 'step': 12742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:52.182273', 'step': 12742, 'epoch': 2} {'type': 'loss', 'content': 0.11395440995693207, 'timestamp': '2025-09-10 02:50:52.184368', 'step': 12743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:52.240253', 'step': 12743, 'epoch': 2} {'type': 'loss', 'content': 0.10025954991579056, 'timestamp': '2025-09-10 02:50:52.246390', 'step': 12744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:52.300496', 'step': 12744, 'epoch': 2} {'type': 'loss', 'content': 0.15618295967578888, 'timestamp': '2025-09-10 02:50:52.302532', 'step': 12745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:52.356813', 'step': 12745, 'epoch': 2} {'type': 'loss', 'content': 0.24377557635307312, 'timestamp': '2025-09-10 02:50:52.359319', 'step': 12746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:52.414024', 'step': 12746, 'epoch': 2} {'type': 'loss', 'content': 0.09843122214078903, 'timestamp': '2025-09-10 02:50:52.416188', 'step': 12747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:52.470903', 'step': 12747, 'epoch': 2} {'type': 'loss', 'content': 0.09849175810813904, 'timestamp': '2025-09-10 02:50:52.477172', 'step': 12748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:52.531596', 'step': 12748, 'epoch': 2} {'type': 'loss', 'content': 0.18110203742980957, 'timestamp': '2025-09-10 02:50:52.534206', 'step': 12749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:52.591535', 'step': 12749, 'epoch': 2} {'type': 'loss', 'content': 0.0979834571480751, 'timestamp': '2025-09-10 02:50:52.593829', 'step': 12750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:52.648920', 'step': 12750, 'epoch': 2} {'type': 'loss', 'content': 0.18992027640342712, 'timestamp': '2025-09-10 02:50:52.651368', 'step': 12751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:52.706415', 'step': 12751, 'epoch': 2} {'type': 'loss', 'content': 0.13533379137516022, 'timestamp': '2025-09-10 02:50:52.712658', 'step': 12752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:52.766830', 'step': 12752, 'epoch': 2} {'type': 'loss', 'content': 0.20326001942157745, 'timestamp': '2025-09-10 02:50:52.768967', 'step': 12753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:52.823374', 'step': 12753, 'epoch': 2} {'type': 'loss', 'content': 0.10100430995225906, 'timestamp': '2025-09-10 02:50:52.825371', 'step': 12754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:52.882155', 'step': 12754, 'epoch': 2} {'type': 'loss', 'content': 0.07577317208051682, 'timestamp': '2025-09-10 02:50:52.884689', 'step': 12755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:52.941084', 'step': 12755, 'epoch': 2} {'type': 'loss', 'content': 0.08271972090005875, 'timestamp': '2025-09-10 02:50:52.948149', 'step': 12756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.004971', 'step': 12756, 'epoch': 2} {'type': 'loss', 'content': 0.1422554850578308, 'timestamp': '2025-09-10 02:50:53.007320', 'step': 12757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.061726', 'step': 12757, 'epoch': 2} {'type': 'loss', 'content': 0.14879441261291504, 'timestamp': '2025-09-10 02:50:53.064021', 'step': 12758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:53.118885', 'step': 12758, 'epoch': 2} {'type': 'loss', 'content': 0.13418416678905487, 'timestamp': '2025-09-10 02:50:53.121114', 'step': 12759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:53.175819', 'step': 12759, 'epoch': 2} {'type': 'loss', 'content': 0.13560731709003448, 'timestamp': '2025-09-10 02:50:53.181998', 'step': 12760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.237020', 'step': 12760, 'epoch': 2} {'type': 'loss', 'content': 0.10477127879858017, 'timestamp': '2025-09-10 02:50:53.239131', 'step': 12761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:53.295173', 'step': 12761, 'epoch': 2} {'type': 'loss', 'content': 0.12550465762615204, 'timestamp': '2025-09-10 02:50:53.297198', 'step': 12762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.352294', 'step': 12762, 'epoch': 2} {'type': 'loss', 'content': 0.19013094902038574, 'timestamp': '2025-09-10 02:50:53.354355', 'step': 12763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.408927', 'step': 12763, 'epoch': 2} {'type': 'loss', 'content': 0.05092412978410721, 'timestamp': '2025-09-10 02:50:53.415437', 'step': 12764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.469241', 'step': 12764, 'epoch': 2} {'type': 'loss', 'content': 0.06254931539297104, 'timestamp': '2025-09-10 02:50:53.472721', 'step': 12765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:53.528112', 'step': 12765, 'epoch': 2} {'type': 'loss', 'content': 0.14084869623184204, 'timestamp': '2025-09-10 02:50:53.530263', 'step': 12766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.585348', 'step': 12766, 'epoch': 2} {'type': 'loss', 'content': 0.13834284245967865, 'timestamp': '2025-09-10 02:50:53.587453', 'step': 12767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:53.642907', 'step': 12767, 'epoch': 2} {'type': 'loss', 'content': 0.12158341705799103, 'timestamp': '2025-09-10 02:50:53.649087', 'step': 12768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.703014', 'step': 12768, 'epoch': 2} {'type': 'loss', 'content': 0.12311282753944397, 'timestamp': '2025-09-10 02:50:53.705665', 'step': 12769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:53.760683', 'step': 12769, 'epoch': 2} {'type': 'loss', 'content': 0.12502792477607727, 'timestamp': '2025-09-10 02:50:53.762810', 'step': 12770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.818620', 'step': 12770, 'epoch': 2} {'type': 'loss', 'content': 0.05600113794207573, 'timestamp': '2025-09-10 02:50:53.820568', 'step': 12771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:53.874947', 'step': 12771, 'epoch': 2} {'type': 'loss', 'content': 0.11683059483766556, 'timestamp': '2025-09-10 02:50:53.880982', 'step': 12772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:53.935608', 'step': 12772, 'epoch': 2} {'type': 'loss', 'content': 0.1545744091272354, 'timestamp': '2025-09-10 02:50:53.937659', 'step': 12773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:53.993026', 'step': 12773, 'epoch': 2} {'type': 'loss', 'content': 0.26160576939582825, 'timestamp': '2025-09-10 02:50:53.995295', 'step': 12774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:54.051186', 'step': 12774, 'epoch': 2} {'type': 'loss', 'content': 0.08597559481859207, 'timestamp': '2025-09-10 02:50:54.053114', 'step': 12775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:54.124515', 'step': 12775, 'epoch': 2} {'type': 'loss', 'content': 0.16318145394325256, 'timestamp': '2025-09-10 02:50:54.130721', 'step': 12776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:54.223239', 'step': 12776, 'epoch': 2} {'type': 'loss', 'content': 0.07974907010793686, 'timestamp': '2025-09-10 02:50:54.225495', 'step': 12777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:54.300170', 'step': 12777, 'epoch': 2} {'type': 'loss', 'content': 0.08117849379777908, 'timestamp': '2025-09-10 02:50:54.302382', 'step': 12778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:54.367652', 'step': 12778, 'epoch': 2} {'type': 'loss', 'content': 0.10334941744804382, 'timestamp': '2025-09-10 02:50:54.369582', 'step': 12779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:54.450082', 'step': 12779, 'epoch': 2} {'type': 'loss', 'content': 0.11089715361595154, 'timestamp': '2025-09-10 02:50:54.456670', 'step': 12780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:54.529797', 'step': 12780, 'epoch': 2} {'type': 'loss', 'content': 0.0890086442232132, 'timestamp': '2025-09-10 02:50:54.532328', 'step': 12781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:54.631465', 'step': 12781, 'epoch': 2} {'type': 'loss', 'content': 0.08417792618274689, 'timestamp': '2025-09-10 02:50:54.633611', 'step': 12782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:54.732999', 'step': 12782, 'epoch': 2} {'type': 'loss', 'content': 0.11708389222621918, 'timestamp': '2025-09-10 02:50:54.734932', 'step': 12783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:54.805558', 'step': 12783, 'epoch': 2} {'type': 'loss', 'content': 0.2073812186717987, 'timestamp': '2025-09-10 02:50:54.811978', 'step': 12784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:54.868797', 'step': 12784, 'epoch': 2} {'type': 'loss', 'content': 0.13781949877738953, 'timestamp': '2025-09-10 02:50:54.872571', 'step': 12785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:54.931166', 'step': 12785, 'epoch': 2} {'type': 'loss', 'content': 0.08375421911478043, 'timestamp': '2025-09-10 02:50:54.933193', 'step': 12786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:54.989728', 'step': 12786, 'epoch': 2} {'type': 'loss', 'content': 0.09131333976984024, 'timestamp': '2025-09-10 02:50:54.991842', 'step': 12787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:55.047501', 'step': 12787, 'epoch': 2} {'type': 'loss', 'content': 0.09647493064403534, 'timestamp': '2025-09-10 02:50:55.053566', 'step': 12788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:55.108303', 'step': 12788, 'epoch': 2} {'type': 'loss', 'content': 0.14770184457302094, 'timestamp': '2025-09-10 02:50:55.110230', 'step': 12789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:55.169631', 'step': 12789, 'epoch': 2} {'type': 'loss', 'content': 0.10397572070360184, 'timestamp': '2025-09-10 02:50:55.172867', 'step': 12790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:55.229869', 'step': 12790, 'epoch': 2} {'type': 'loss', 'content': 0.1419735997915268, 'timestamp': '2025-09-10 02:50:55.231826', 'step': 12791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:55.286825', 'step': 12791, 'epoch': 2} {'type': 'loss', 'content': 0.15748204290866852, 'timestamp': '2025-09-10 02:50:55.292780', 'step': 12792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:55.348578', 'step': 12792, 'epoch': 2} {'type': 'loss', 'content': 0.14863763749599457, 'timestamp': '2025-09-10 02:50:55.350804', 'step': 12793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:55.414830', 'step': 12793, 'epoch': 2} {'type': 'loss', 'content': 0.06252440065145493, 'timestamp': '2025-09-10 02:50:55.416970', 'step': 12794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:55.476062', 'step': 12794, 'epoch': 2} {'type': 'loss', 'content': 0.18840865790843964, 'timestamp': '2025-09-10 02:50:55.478311', 'step': 12795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:55.534479', 'step': 12795, 'epoch': 2} {'type': 'loss', 'content': 0.1475784033536911, 'timestamp': '2025-09-10 02:50:55.540731', 'step': 12796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:55.597267', 'step': 12796, 'epoch': 2} {'type': 'loss', 'content': 0.11009302735328674, 'timestamp': '2025-09-10 02:50:55.599667', 'step': 12797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:55.654620', 'step': 12797, 'epoch': 2} {'type': 'loss', 'content': 0.1516844928264618, 'timestamp': '2025-09-10 02:50:55.656681', 'step': 12798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:55.716193', 'step': 12798, 'epoch': 2} {'type': 'loss', 'content': 0.1452881097793579, 'timestamp': '2025-09-10 02:50:55.718718', 'step': 12799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:55.774361', 'step': 12799, 'epoch': 2} {'type': 'loss', 'content': 0.10062038898468018, 'timestamp': '2025-09-10 02:50:55.780288', 'step': 12800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:55.834077', 'step': 12800, 'epoch': 2} {'type': 'loss', 'content': 0.07330232113599777, 'timestamp': '2025-09-10 02:50:55.836404', 'step': 12801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:55.891201', 'step': 12801, 'epoch': 2} {'type': 'loss', 'content': 0.027307545766234398, 'timestamp': '2025-09-10 02:50:55.893591', 'step': 12802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:55.949421', 'step': 12802, 'epoch': 2} {'type': 'loss', 'content': 0.1484661102294922, 'timestamp': '2025-09-10 02:50:55.951748', 'step': 12803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:56.008165', 'step': 12803, 'epoch': 2} {'type': 'loss', 'content': 0.1452760547399521, 'timestamp': '2025-09-10 02:50:56.014286', 'step': 12804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:56.068712', 'step': 12804, 'epoch': 2} {'type': 'loss', 'content': 0.14522016048431396, 'timestamp': '2025-09-10 02:50:56.070610', 'step': 12805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:56.131495', 'step': 12805, 'epoch': 2} {'type': 'loss', 'content': 0.16359838843345642, 'timestamp': '2025-09-10 02:50:56.133714', 'step': 12806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:56.188570', 'step': 12806, 'epoch': 2} {'type': 'loss', 'content': 0.1678089052438736, 'timestamp': '2025-09-10 02:50:56.190507', 'step': 12807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:56.245057', 'step': 12807, 'epoch': 2} {'type': 'loss', 'content': 0.18737903237342834, 'timestamp': '2025-09-10 02:50:56.251346', 'step': 12808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:56.305530', 'step': 12808, 'epoch': 2} {'type': 'loss', 'content': 0.13662198185920715, 'timestamp': '2025-09-10 02:50:56.307664', 'step': 12809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:56.362661', 'step': 12809, 'epoch': 2} {'type': 'loss', 'content': 0.1781313419342041, 'timestamp': '2025-09-10 02:50:56.364775', 'step': 12810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:56.419683', 'step': 12810, 'epoch': 2} {'type': 'loss', 'content': 0.14470209181308746, 'timestamp': '2025-09-10 02:50:56.421879', 'step': 12811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:56.477242', 'step': 12811, 'epoch': 2} {'type': 'loss', 'content': 0.2028171271085739, 'timestamp': '2025-09-10 02:50:56.483883', 'step': 12812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:56.539531', 'step': 12812, 'epoch': 2} {'type': 'loss', 'content': 0.08445467054843903, 'timestamp': '2025-09-10 02:50:56.542084', 'step': 12813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:56.597133', 'step': 12813, 'epoch': 2} {'type': 'loss', 'content': 0.09196805953979492, 'timestamp': '2025-09-10 02:50:56.599227', 'step': 12814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:56.654423', 'step': 12814, 'epoch': 2} {'type': 'loss', 'content': 0.12690091133117676, 'timestamp': '2025-09-10 02:50:56.656341', 'step': 12815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:56.713151', 'step': 12815, 'epoch': 2} {'type': 'loss', 'content': 0.15464136004447937, 'timestamp': '2025-09-10 02:50:56.719514', 'step': 12816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:56.773828', 'step': 12816, 'epoch': 2} {'type': 'loss', 'content': 0.10648316890001297, 'timestamp': '2025-09-10 02:50:56.776111', 'step': 12817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:56.837321', 'step': 12817, 'epoch': 2} {'type': 'loss', 'content': 0.10823679715394974, 'timestamp': '2025-09-10 02:50:56.840112', 'step': 12818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:56.895145', 'step': 12818, 'epoch': 2} {'type': 'loss', 'content': 0.07816679030656815, 'timestamp': '2025-09-10 02:50:56.903667', 'step': 12819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:56.960183', 'step': 12819, 'epoch': 2} {'type': 'loss', 'content': 0.14050087332725525, 'timestamp': '2025-09-10 02:50:56.966113', 'step': 12820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.021115', 'step': 12820, 'epoch': 2} {'type': 'loss', 'content': 0.09982749819755554, 'timestamp': '2025-09-10 02:50:57.024488', 'step': 12821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.079258', 'step': 12821, 'epoch': 2} {'type': 'loss', 'content': 0.10485130548477173, 'timestamp': '2025-09-10 02:50:57.081189', 'step': 12822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.146595', 'step': 12822, 'epoch': 2} {'type': 'loss', 'content': 0.1372055858373642, 'timestamp': '2025-09-10 02:50:57.148495', 'step': 12823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.203584', 'step': 12823, 'epoch': 2} {'type': 'loss', 'content': 0.12547644972801208, 'timestamp': '2025-09-10 02:50:57.213983', 'step': 12824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:57.268116', 'step': 12824, 'epoch': 2} {'type': 'loss', 'content': 0.10064829140901566, 'timestamp': '2025-09-10 02:50:57.270459', 'step': 12825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:57.325653', 'step': 12825, 'epoch': 2} {'type': 'loss', 'content': 0.11164399981498718, 'timestamp': '2025-09-10 02:50:57.327546', 'step': 12826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.383151', 'step': 12826, 'epoch': 2} {'type': 'loss', 'content': 0.05663536489009857, 'timestamp': '2025-09-10 02:50:57.385594', 'step': 12827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.440220', 'step': 12827, 'epoch': 2} {'type': 'loss', 'content': 0.06317677348852158, 'timestamp': '2025-09-10 02:50:57.446553', 'step': 12828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:57.500176', 'step': 12828, 'epoch': 2} {'type': 'loss', 'content': 0.14661185443401337, 'timestamp': '2025-09-10 02:50:57.502157', 'step': 12829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:57.556857', 'step': 12829, 'epoch': 2} {'type': 'loss', 'content': 0.1069970354437828, 'timestamp': '2025-09-10 02:50:57.559377', 'step': 12830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:50:57.614252', 'step': 12830, 'epoch': 2} {'type': 'loss', 'content': 0.10907268524169922, 'timestamp': '2025-09-10 02:50:57.616639', 'step': 12831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.670615', 'step': 12831, 'epoch': 2} {'type': 'loss', 'content': 0.1541110873222351, 'timestamp': '2025-09-10 02:50:57.677211', 'step': 12832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:57.732142', 'step': 12832, 'epoch': 2} {'type': 'loss', 'content': 0.10052043944597244, 'timestamp': '2025-09-10 02:50:57.734107', 'step': 12833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.789240', 'step': 12833, 'epoch': 2} {'type': 'loss', 'content': 0.08594492077827454, 'timestamp': '2025-09-10 02:50:57.791110', 'step': 12834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:57.845488', 'step': 12834, 'epoch': 2} {'type': 'loss', 'content': 0.07004429399967194, 'timestamp': '2025-09-10 02:50:57.847486', 'step': 12835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.901501', 'step': 12835, 'epoch': 2} {'type': 'loss', 'content': 0.06718844920396805, 'timestamp': '2025-09-10 02:50:57.907680', 'step': 12836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:57.961852', 'step': 12836, 'epoch': 2} {'type': 'loss', 'content': 0.09921278059482574, 'timestamp': '2025-09-10 02:50:57.963944', 'step': 12837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:58.018249', 'step': 12837, 'epoch': 2} {'type': 'loss', 'content': 0.1139267310500145, 'timestamp': '2025-09-10 02:50:58.020181', 'step': 12838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:58.074335', 'step': 12838, 'epoch': 2} {'type': 'loss', 'content': 0.17833314836025238, 'timestamp': '2025-09-10 02:50:58.076506', 'step': 12839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:58.132169', 'step': 12839, 'epoch': 2} {'type': 'loss', 'content': 0.1158914566040039, 'timestamp': '2025-09-10 02:50:58.138118', 'step': 12840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:58.191957', 'step': 12840, 'epoch': 2} {'type': 'loss', 'content': 0.09674115478992462, 'timestamp': '2025-09-10 02:50:58.194379', 'step': 12841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:58.248804', 'step': 12841, 'epoch': 2} {'type': 'loss', 'content': 0.10680284351110458, 'timestamp': '2025-09-10 02:50:58.251218', 'step': 12842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:58.305525', 'step': 12842, 'epoch': 2} {'type': 'loss', 'content': 0.06809789687395096, 'timestamp': '2025-09-10 02:50:58.308057', 'step': 12843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:58.364470', 'step': 12843, 'epoch': 2} {'type': 'loss', 'content': 0.10223045945167542, 'timestamp': '2025-09-10 02:50:58.370815', 'step': 12844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:58.424742', 'step': 12844, 'epoch': 2} {'type': 'loss', 'content': 0.09670708328485489, 'timestamp': '2025-09-10 02:50:58.426962', 'step': 12845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:58.481861', 'step': 12845, 'epoch': 2} {'type': 'loss', 'content': 0.06559663265943527, 'timestamp': '2025-09-10 02:50:58.483997', 'step': 12846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:58.537425', 'step': 12846, 'epoch': 2} {'type': 'loss', 'content': 0.10197091102600098, 'timestamp': '2025-09-10 02:50:58.539447', 'step': 12847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:58.593336', 'step': 12847, 'epoch': 2} {'type': 'loss', 'content': 0.12795908749103546, 'timestamp': '2025-09-10 02:50:58.599381', 'step': 12848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:58.652293', 'step': 12848, 'epoch': 2} {'type': 'loss', 'content': 0.11316734552383423, 'timestamp': '2025-09-10 02:50:58.654109', 'step': 12849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:58.708132', 'step': 12849, 'epoch': 2} {'type': 'loss', 'content': 0.18338310718536377, 'timestamp': '2025-09-10 02:50:58.710083', 'step': 12850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:58.765553', 'step': 12850, 'epoch': 2} {'type': 'loss', 'content': 0.09378794580698013, 'timestamp': '2025-09-10 02:50:58.767730', 'step': 12851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:58.821689', 'step': 12851, 'epoch': 2} {'type': 'loss', 'content': 0.18608413636684418, 'timestamp': '2025-09-10 02:50:58.827897', 'step': 12852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:58.880893', 'step': 12852, 'epoch': 2} {'type': 'loss', 'content': 0.06405822187662125, 'timestamp': '2025-09-10 02:50:58.883061', 'step': 12853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:58.936521', 'step': 12853, 'epoch': 2} {'type': 'loss', 'content': 0.07920365035533905, 'timestamp': '2025-09-10 02:50:58.938590', 'step': 12854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:58.992061', 'step': 12854, 'epoch': 2} {'type': 'loss', 'content': 0.16684620082378387, 'timestamp': '2025-09-10 02:50:58.994442', 'step': 12855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:59.047870', 'step': 12855, 'epoch': 2} {'type': 'loss', 'content': 0.15016154944896698, 'timestamp': '2025-09-10 02:50:59.053706', 'step': 12856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:59.107231', 'step': 12856, 'epoch': 2} {'type': 'loss', 'content': 0.09956727176904678, 'timestamp': '2025-09-10 02:50:59.109579', 'step': 12857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:59.164093', 'step': 12857, 'epoch': 2} {'type': 'loss', 'content': 0.16716380417346954, 'timestamp': '2025-09-10 02:50:59.166593', 'step': 12858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:59.220905', 'step': 12858, 'epoch': 2} {'type': 'loss', 'content': 0.16201373934745789, 'timestamp': '2025-09-10 02:50:59.222999', 'step': 12859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:59.277165', 'step': 12859, 'epoch': 2} {'type': 'loss', 'content': 0.09664006531238556, 'timestamp': '2025-09-10 02:50:59.283059', 'step': 12860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:59.336371', 'step': 12860, 'epoch': 2} {'type': 'loss', 'content': 0.08324851840734482, 'timestamp': '2025-09-10 02:50:59.338966', 'step': 12861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:59.392584', 'step': 12861, 'epoch': 2} {'type': 'loss', 'content': 0.10765406489372253, 'timestamp': '2025-09-10 02:50:59.394440', 'step': 12862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:59.449833', 'step': 12862, 'epoch': 2} {'type': 'loss', 'content': 0.10690523684024811, 'timestamp': '2025-09-10 02:50:59.451963', 'step': 12863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:59.506325', 'step': 12863, 'epoch': 2} {'type': 'loss', 'content': 0.06550901383161545, 'timestamp': '2025-09-10 02:50:59.512062', 'step': 12864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:59.566984', 'step': 12864, 'epoch': 2} {'type': 'loss', 'content': 0.07868903875350952, 'timestamp': '2025-09-10 02:50:59.569334', 'step': 12865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:59.623497', 'step': 12865, 'epoch': 2} {'type': 'loss', 'content': 0.11471409350633621, 'timestamp': '2025-09-10 02:50:59.625531', 'step': 12866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:59.680698', 'step': 12866, 'epoch': 2} {'type': 'loss', 'content': 0.11545468866825104, 'timestamp': '2025-09-10 02:50:59.683073', 'step': 12867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:59.737558', 'step': 12867, 'epoch': 2} {'type': 'loss', 'content': 0.09456147253513336, 'timestamp': '2025-09-10 02:50:59.743447', 'step': 12868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:50:59.797655', 'step': 12868, 'epoch': 2} {'type': 'loss', 'content': 0.06253068149089813, 'timestamp': '2025-09-10 02:50:59.799562', 'step': 12869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:59.853617', 'step': 12869, 'epoch': 2} {'type': 'loss', 'content': 0.09665927290916443, 'timestamp': '2025-09-10 02:50:59.855678', 'step': 12870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:50:59.910551', 'step': 12870, 'epoch': 2} {'type': 'loss', 'content': 0.1207694485783577, 'timestamp': '2025-09-10 02:50:59.912778', 'step': 12871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:59.968099', 'step': 12871, 'epoch': 2} {'type': 'loss', 'content': 0.1659700572490692, 'timestamp': '2025-09-10 02:50:59.974034', 'step': 12872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:00.028424', 'step': 12872, 'epoch': 2} {'type': 'loss', 'content': 0.09885537624359131, 'timestamp': '2025-09-10 02:51:00.030544', 'step': 12873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:00.085775', 'step': 12873, 'epoch': 2} {'type': 'loss', 'content': 0.08144444227218628, 'timestamp': '2025-09-10 02:51:00.087930', 'step': 12874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:00.146468', 'step': 12874, 'epoch': 2} {'type': 'loss', 'content': 0.1567268669605255, 'timestamp': '2025-09-10 02:51:00.148811', 'step': 12875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:00.203923', 'step': 12875, 'epoch': 2} {'type': 'loss', 'content': 0.2185012847185135, 'timestamp': '2025-09-10 02:51:00.210557', 'step': 12876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:00.264890', 'step': 12876, 'epoch': 2} {'type': 'loss', 'content': 0.12433890998363495, 'timestamp': '2025-09-10 02:51:00.267305', 'step': 12877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:00.321782', 'step': 12877, 'epoch': 2} {'type': 'loss', 'content': 0.1551889181137085, 'timestamp': '2025-09-10 02:51:00.323894', 'step': 12878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:00.378870', 'step': 12878, 'epoch': 2} {'type': 'loss', 'content': 0.06920655071735382, 'timestamp': '2025-09-10 02:51:00.380934', 'step': 12879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:00.435805', 'step': 12879, 'epoch': 2} {'type': 'loss', 'content': 0.0693032294511795, 'timestamp': '2025-09-10 02:51:00.443533', 'step': 12880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:00.499960', 'step': 12880, 'epoch': 2} {'type': 'loss', 'content': 0.08466935902833939, 'timestamp': '2025-09-10 02:51:00.502075', 'step': 12881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:00.556215', 'step': 12881, 'epoch': 2} {'type': 'loss', 'content': 0.20270368456840515, 'timestamp': '2025-09-10 02:51:00.558607', 'step': 12882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:00.613210', 'step': 12882, 'epoch': 2} {'type': 'loss', 'content': 0.05559726059436798, 'timestamp': '2025-09-10 02:51:00.615125', 'step': 12883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:00.676496', 'step': 12883, 'epoch': 2} {'type': 'loss', 'content': 0.14814402163028717, 'timestamp': '2025-09-10 02:51:00.682759', 'step': 12884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:00.738499', 'step': 12884, 'epoch': 2} {'type': 'loss', 'content': 0.13395050168037415, 'timestamp': '2025-09-10 02:51:00.740563', 'step': 12885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:00.797994', 'step': 12885, 'epoch': 2} {'type': 'loss', 'content': 0.08170916885137558, 'timestamp': '2025-09-10 02:51:00.800192', 'step': 12886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:00.858306', 'step': 12886, 'epoch': 2} {'type': 'loss', 'content': 0.0744868591427803, 'timestamp': '2025-09-10 02:51:00.860185', 'step': 12887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:00.914567', 'step': 12887, 'epoch': 2} {'type': 'loss', 'content': 0.18663209676742554, 'timestamp': '2025-09-10 02:51:00.920677', 'step': 12888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:00.974274', 'step': 12888, 'epoch': 2} {'type': 'loss', 'content': 0.06330034136772156, 'timestamp': '2025-09-10 02:51:00.976329', 'step': 12889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:01.030823', 'step': 12889, 'epoch': 2} {'type': 'loss', 'content': 0.0615353025496006, 'timestamp': '2025-09-10 02:51:01.033182', 'step': 12890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:01.088176', 'step': 12890, 'epoch': 2} {'type': 'loss', 'content': 0.09414786100387573, 'timestamp': '2025-09-10 02:51:01.090760', 'step': 12891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:01.151699', 'step': 12891, 'epoch': 2} {'type': 'loss', 'content': 0.08545853197574615, 'timestamp': '2025-09-10 02:51:01.158022', 'step': 12892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:01.212457', 'step': 12892, 'epoch': 2} {'type': 'loss', 'content': 0.11796973645687103, 'timestamp': '2025-09-10 02:51:01.214765', 'step': 12893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:01.270596', 'step': 12893, 'epoch': 2} {'type': 'loss', 'content': 0.12192820757627487, 'timestamp': '2025-09-10 02:51:01.272792', 'step': 12894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:01.328978', 'step': 12894, 'epoch': 2} {'type': 'loss', 'content': 0.17119525372982025, 'timestamp': '2025-09-10 02:51:01.330930', 'step': 12895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:01.385528', 'step': 12895, 'epoch': 2} {'type': 'loss', 'content': 0.09599975496530533, 'timestamp': '2025-09-10 02:51:01.391595', 'step': 12896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:01.445261', 'step': 12896, 'epoch': 2} {'type': 'loss', 'content': 0.09984926879405975, 'timestamp': '2025-09-10 02:51:01.447202', 'step': 12897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:01.503069', 'step': 12897, 'epoch': 2} {'type': 'loss', 'content': 0.15344497561454773, 'timestamp': '2025-09-10 02:51:01.506468', 'step': 12898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:01.562235', 'step': 12898, 'epoch': 2} {'type': 'loss', 'content': 0.11499017477035522, 'timestamp': '2025-09-10 02:51:01.564506', 'step': 12899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:01.618589', 'step': 12899, 'epoch': 2} {'type': 'loss', 'content': 0.13853861391544342, 'timestamp': '2025-09-10 02:51:01.625339', 'step': 12900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:01.679695', 'step': 12900, 'epoch': 2} {'type': 'loss', 'content': 0.17036785185337067, 'timestamp': '2025-09-10 02:51:01.681703', 'step': 12901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:01.736871', 'step': 12901, 'epoch': 2} {'type': 'loss', 'content': 0.11313880234956741, 'timestamp': '2025-09-10 02:51:01.738951', 'step': 12902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:01.793490', 'step': 12902, 'epoch': 2} {'type': 'loss', 'content': 0.12498092651367188, 'timestamp': '2025-09-10 02:51:01.795522', 'step': 12903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:01.851677', 'step': 12903, 'epoch': 2} {'type': 'loss', 'content': 0.09063781052827835, 'timestamp': '2025-09-10 02:51:01.857770', 'step': 12904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:01.913639', 'step': 12904, 'epoch': 2} {'type': 'loss', 'content': 0.0938582792878151, 'timestamp': '2025-09-10 02:51:01.915571', 'step': 12905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:01.970144', 'step': 12905, 'epoch': 2} {'type': 'loss', 'content': 0.11378508806228638, 'timestamp': '2025-09-10 02:51:01.972418', 'step': 12906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:02.028339', 'step': 12906, 'epoch': 2} {'type': 'loss', 'content': 0.06984610110521317, 'timestamp': '2025-09-10 02:51:02.030480', 'step': 12907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:02.084540', 'step': 12907, 'epoch': 2} {'type': 'loss', 'content': 0.06775353848934174, 'timestamp': '2025-09-10 02:51:02.090649', 'step': 12908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:02.157300', 'step': 12908, 'epoch': 2} {'type': 'loss', 'content': 0.11962316930294037, 'timestamp': '2025-09-10 02:51:02.159546', 'step': 12909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:02.213511', 'step': 12909, 'epoch': 2} {'type': 'loss', 'content': 0.10973254591226578, 'timestamp': '2025-09-10 02:51:02.215873', 'step': 12910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:02.270765', 'step': 12910, 'epoch': 2} {'type': 'loss', 'content': 0.17647022008895874, 'timestamp': '2025-09-10 02:51:02.272687', 'step': 12911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:02.326633', 'step': 12911, 'epoch': 2} {'type': 'loss', 'content': 0.07542739063501358, 'timestamp': '2025-09-10 02:51:02.332913', 'step': 12912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:02.387863', 'step': 12912, 'epoch': 2} {'type': 'loss', 'content': 0.11164918541908264, 'timestamp': '2025-09-10 02:51:02.390195', 'step': 12913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:02.444601', 'step': 12913, 'epoch': 2} {'type': 'loss', 'content': 0.2074187994003296, 'timestamp': '2025-09-10 02:51:02.446653', 'step': 12914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:02.500656', 'step': 12914, 'epoch': 2} {'type': 'loss', 'content': 0.12268321961164474, 'timestamp': '2025-09-10 02:51:02.502875', 'step': 12915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:02.558430', 'step': 12915, 'epoch': 2} {'type': 'loss', 'content': 0.07055135071277618, 'timestamp': '2025-09-10 02:51:02.564544', 'step': 12916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:02.618623', 'step': 12916, 'epoch': 2} {'type': 'loss', 'content': 0.12070108950138092, 'timestamp': '2025-09-10 02:51:02.620525', 'step': 12917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:02.675307', 'step': 12917, 'epoch': 2} {'type': 'loss', 'content': 0.25505682826042175, 'timestamp': '2025-09-10 02:51:02.677209', 'step': 12918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:02.733664', 'step': 12918, 'epoch': 2} {'type': 'loss', 'content': 0.09220583736896515, 'timestamp': '2025-09-10 02:51:02.735615', 'step': 12919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:02.789858', 'step': 12919, 'epoch': 2} {'type': 'loss', 'content': 0.15037409961223602, 'timestamp': '2025-09-10 02:51:02.795852', 'step': 12920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:02.849940', 'step': 12920, 'epoch': 2} {'type': 'loss', 'content': 0.0816371887922287, 'timestamp': '2025-09-10 02:51:02.851833', 'step': 12921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:02.909324', 'step': 12921, 'epoch': 2} {'type': 'loss', 'content': 0.08405545353889465, 'timestamp': '2025-09-10 02:51:02.911690', 'step': 12922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:02.966673', 'step': 12922, 'epoch': 2} {'type': 'loss', 'content': 0.08584358543157578, 'timestamp': '2025-09-10 02:51:02.968913', 'step': 12923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:03.024476', 'step': 12923, 'epoch': 2} {'type': 'loss', 'content': 0.0576309897005558, 'timestamp': '2025-09-10 02:51:03.030765', 'step': 12924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:03.084945', 'step': 12924, 'epoch': 2} {'type': 'loss', 'content': 0.05436902120709419, 'timestamp': '2025-09-10 02:51:03.087309', 'step': 12925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:03.142791', 'step': 12925, 'epoch': 2} {'type': 'loss', 'content': 0.14315450191497803, 'timestamp': '2025-09-10 02:51:03.144778', 'step': 12926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:03.199312', 'step': 12926, 'epoch': 2} {'type': 'loss', 'content': 0.11078479886054993, 'timestamp': '2025-09-10 02:51:03.201572', 'step': 12927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:03.256506', 'step': 12927, 'epoch': 2} {'type': 'loss', 'content': 0.16383662819862366, 'timestamp': '2025-09-10 02:51:03.262516', 'step': 12928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:03.317690', 'step': 12928, 'epoch': 2} {'type': 'loss', 'content': 0.06149863824248314, 'timestamp': '2025-09-10 02:51:03.319490', 'step': 12929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:03.374942', 'step': 12929, 'epoch': 2} {'type': 'loss', 'content': 0.2099573314189911, 'timestamp': '2025-09-10 02:51:03.377099', 'step': 12930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:03.435913', 'step': 12930, 'epoch': 2} {'type': 'loss', 'content': 0.10579624772071838, 'timestamp': '2025-09-10 02:51:03.438085', 'step': 12931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:03.494058', 'step': 12931, 'epoch': 2} {'type': 'loss', 'content': 0.06774107366800308, 'timestamp': '2025-09-10 02:51:03.500118', 'step': 12932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:03.554332', 'step': 12932, 'epoch': 2} {'type': 'loss', 'content': 0.10862166434526443, 'timestamp': '2025-09-10 02:51:03.556295', 'step': 12933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:03.610966', 'step': 12933, 'epoch': 2} {'type': 'loss', 'content': 0.15339909493923187, 'timestamp': '2025-09-10 02:51:03.613315', 'step': 12934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:03.667952', 'step': 12934, 'epoch': 2} {'type': 'loss', 'content': 0.07540830224752426, 'timestamp': '2025-09-10 02:51:03.669889', 'step': 12935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:03.723950', 'step': 12935, 'epoch': 2} {'type': 'loss', 'content': 0.07028525322675705, 'timestamp': '2025-09-10 02:51:03.730120', 'step': 12936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:03.782872', 'step': 12936, 'epoch': 2} {'type': 'loss', 'content': 0.21522007882595062, 'timestamp': '2025-09-10 02:51:03.785042', 'step': 12937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:03.837776', 'step': 12937, 'epoch': 2} {'type': 'loss', 'content': 0.15997740626335144, 'timestamp': '2025-09-10 02:51:03.840072', 'step': 12938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:03.893959', 'step': 12938, 'epoch': 2} {'type': 'loss', 'content': 0.12378180027008057, 'timestamp': '2025-09-10 02:51:03.896065', 'step': 12939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:03.949510', 'step': 12939, 'epoch': 2} {'type': 'loss', 'content': 0.11179016530513763, 'timestamp': '2025-09-10 02:51:03.955656', 'step': 12940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:04.008897', 'step': 12940, 'epoch': 2} {'type': 'loss', 'content': 0.18189898133277893, 'timestamp': '2025-09-10 02:51:04.011555', 'step': 12941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:04.065582', 'step': 12941, 'epoch': 2} {'type': 'loss', 'content': 0.05649445578455925, 'timestamp': '2025-09-10 02:51:04.067646', 'step': 12942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:04.121447', 'step': 12942, 'epoch': 2} {'type': 'loss', 'content': 0.08740049600601196, 'timestamp': '2025-09-10 02:51:04.123927', 'step': 12943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:04.178745', 'step': 12943, 'epoch': 2} {'type': 'loss', 'content': 0.15538166463375092, 'timestamp': '2025-09-10 02:51:04.184670', 'step': 12944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:04.238565', 'step': 12944, 'epoch': 2} {'type': 'loss', 'content': 0.10026246309280396, 'timestamp': '2025-09-10 02:51:04.240709', 'step': 12945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:04.294776', 'step': 12945, 'epoch': 2} {'type': 'loss', 'content': 0.20392350852489471, 'timestamp': '2025-09-10 02:51:04.297031', 'step': 12946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:04.352023', 'step': 12946, 'epoch': 2} {'type': 'loss', 'content': 0.09386959671974182, 'timestamp': '2025-09-10 02:51:04.353952', 'step': 12947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:04.408718', 'step': 12947, 'epoch': 2} {'type': 'loss', 'content': 0.13546983897686005, 'timestamp': '2025-09-10 02:51:04.415235', 'step': 12948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:04.469390', 'step': 12948, 'epoch': 2} {'type': 'loss', 'content': 0.06544854491949081, 'timestamp': '2025-09-10 02:51:04.471762', 'step': 12949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:04.528073', 'step': 12949, 'epoch': 2} {'type': 'loss', 'content': 0.13683179020881653, 'timestamp': '2025-09-10 02:51:04.530187', 'step': 12950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:04.585499', 'step': 12950, 'epoch': 2} {'type': 'loss', 'content': 0.046773869544267654, 'timestamp': '2025-09-10 02:51:04.587468', 'step': 12951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:04.643194', 'step': 12951, 'epoch': 2} {'type': 'loss', 'content': 0.07105973362922668, 'timestamp': '2025-09-10 02:51:04.649300', 'step': 12952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:04.702922', 'step': 12952, 'epoch': 2} {'type': 'loss', 'content': 0.15655072033405304, 'timestamp': '2025-09-10 02:51:04.705436', 'step': 12953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:04.759843', 'step': 12953, 'epoch': 2} {'type': 'loss', 'content': 0.08987990766763687, 'timestamp': '2025-09-10 02:51:04.761975', 'step': 12954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:04.816475', 'step': 12954, 'epoch': 2} {'type': 'loss', 'content': 0.1360074132680893, 'timestamp': '2025-09-10 02:51:04.818448', 'step': 12955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:04.873240', 'step': 12955, 'epoch': 2} {'type': 'loss', 'content': 0.16875788569450378, 'timestamp': '2025-09-10 02:51:04.879745', 'step': 12956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:04.933859', 'step': 12956, 'epoch': 2} {'type': 'loss', 'content': 0.12815888226032257, 'timestamp': '2025-09-10 02:51:04.936201', 'step': 12957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:04.989964', 'step': 12957, 'epoch': 2} {'type': 'loss', 'content': 0.06858164817094803, 'timestamp': '2025-09-10 02:51:04.992070', 'step': 12958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:05.048766', 'step': 12958, 'epoch': 2} {'type': 'loss', 'content': 0.12062903493642807, 'timestamp': '2025-09-10 02:51:05.050919', 'step': 12959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:05.105695', 'step': 12959, 'epoch': 2} {'type': 'loss', 'content': 0.14909015595912933, 'timestamp': '2025-09-10 02:51:05.111892', 'step': 12960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:05.166255', 'step': 12960, 'epoch': 2} {'type': 'loss', 'content': 0.11888644099235535, 'timestamp': '2025-09-10 02:51:05.168034', 'step': 12961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:05.221998', 'step': 12961, 'epoch': 2} {'type': 'loss', 'content': 0.1996978223323822, 'timestamp': '2025-09-10 02:51:05.223952', 'step': 12962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:05.278532', 'step': 12962, 'epoch': 2} {'type': 'loss', 'content': 0.01595332846045494, 'timestamp': '2025-09-10 02:51:05.280801', 'step': 12963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:05.334793', 'step': 12963, 'epoch': 2} {'type': 'loss', 'content': 0.0972277820110321, 'timestamp': '2025-09-10 02:51:05.340889', 'step': 12964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:05.394819', 'step': 12964, 'epoch': 2} {'type': 'loss', 'content': 0.09720683842897415, 'timestamp': '2025-09-10 02:51:05.396743', 'step': 12965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:05.451503', 'step': 12965, 'epoch': 2} {'type': 'loss', 'content': 0.15379932522773743, 'timestamp': '2025-09-10 02:51:05.454614', 'step': 12966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:05.513764', 'step': 12966, 'epoch': 2} {'type': 'loss', 'content': 0.1706455647945404, 'timestamp': '2025-09-10 02:51:05.515860', 'step': 12967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:05.578254', 'step': 12967, 'epoch': 2} {'type': 'loss', 'content': 0.15162400901317596, 'timestamp': '2025-09-10 02:51:05.584664', 'step': 12968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:05.638465', 'step': 12968, 'epoch': 2} {'type': 'loss', 'content': 0.14126168191432953, 'timestamp': '2025-09-10 02:51:05.646421', 'step': 12969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:05.705420', 'step': 12969, 'epoch': 2} {'type': 'loss', 'content': 0.1332697719335556, 'timestamp': '2025-09-10 02:51:05.707682', 'step': 12970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:05.761814', 'step': 12970, 'epoch': 2} {'type': 'loss', 'content': 0.13075582683086395, 'timestamp': '2025-09-10 02:51:05.764124', 'step': 12971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:05.817406', 'step': 12971, 'epoch': 2} {'type': 'loss', 'content': 0.08792430907487869, 'timestamp': '2025-09-10 02:51:05.824292', 'step': 12972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:05.879103', 'step': 12972, 'epoch': 2} {'type': 'loss', 'content': 0.15712310373783112, 'timestamp': '2025-09-10 02:51:05.881215', 'step': 12973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:05.935224', 'step': 12973, 'epoch': 2} {'type': 'loss', 'content': 0.07264182716608047, 'timestamp': '2025-09-10 02:51:05.937490', 'step': 12974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:05.991418', 'step': 12974, 'epoch': 2} {'type': 'loss', 'content': 0.08691421896219254, 'timestamp': '2025-09-10 02:51:05.998563', 'step': 12975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:06.053687', 'step': 12975, 'epoch': 2} {'type': 'loss', 'content': 0.05518313869833946, 'timestamp': '2025-09-10 02:51:06.059829', 'step': 12976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:06.114772', 'step': 12976, 'epoch': 2} {'type': 'loss', 'content': 0.11303434520959854, 'timestamp': '2025-09-10 02:51:06.117751', 'step': 12977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:06.173310', 'step': 12977, 'epoch': 2} {'type': 'loss', 'content': 0.14999544620513916, 'timestamp': '2025-09-10 02:51:06.175469', 'step': 12978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:06.230058', 'step': 12978, 'epoch': 2} {'type': 'loss', 'content': 0.1178533136844635, 'timestamp': '2025-09-10 02:51:06.232465', 'step': 12979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:06.288883', 'step': 12979, 'epoch': 2} {'type': 'loss', 'content': 0.0772542729973793, 'timestamp': '2025-09-10 02:51:06.295085', 'step': 12980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:06.351806', 'step': 12980, 'epoch': 2} {'type': 'loss', 'content': 0.11704987287521362, 'timestamp': '2025-09-10 02:51:06.354351', 'step': 12981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:06.408884', 'step': 12981, 'epoch': 2} {'type': 'loss', 'content': 0.06925446540117264, 'timestamp': '2025-09-10 02:51:06.411201', 'step': 12982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:06.468297', 'step': 12982, 'epoch': 2} {'type': 'loss', 'content': 0.10696747153997421, 'timestamp': '2025-09-10 02:51:06.474457', 'step': 12983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:06.528470', 'step': 12983, 'epoch': 2} {'type': 'loss', 'content': 0.10046666860580444, 'timestamp': '2025-09-10 02:51:06.534495', 'step': 12984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:06.589461', 'step': 12984, 'epoch': 2} {'type': 'loss', 'content': 0.06316279619932175, 'timestamp': '2025-09-10 02:51:06.591675', 'step': 12985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:06.645989', 'step': 12985, 'epoch': 2} {'type': 'loss', 'content': 0.1769968569278717, 'timestamp': '2025-09-10 02:51:06.652577', 'step': 12986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:06.707569', 'step': 12986, 'epoch': 2} {'type': 'loss', 'content': 0.09384637326002121, 'timestamp': '2025-09-10 02:51:06.710360', 'step': 12987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:06.764108', 'step': 12987, 'epoch': 2} {'type': 'loss', 'content': 0.10754808038473129, 'timestamp': '2025-09-10 02:51:06.770343', 'step': 12988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:06.825482', 'step': 12988, 'epoch': 2} {'type': 'loss', 'content': 0.06306340545415878, 'timestamp': '2025-09-10 02:51:06.827802', 'step': 12989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:06.880703', 'step': 12989, 'epoch': 2} {'type': 'loss', 'content': 0.07911865413188934, 'timestamp': '2025-09-10 02:51:06.882830', 'step': 12990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:06.936545', 'step': 12990, 'epoch': 2} {'type': 'loss', 'content': 0.12571193277835846, 'timestamp': '2025-09-10 02:51:06.938802', 'step': 12991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:06.994421', 'step': 12991, 'epoch': 2} {'type': 'loss', 'content': 0.1862301379442215, 'timestamp': '2025-09-10 02:51:07.000397', 'step': 12992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:07.053187', 'step': 12992, 'epoch': 2} {'type': 'loss', 'content': 0.14569354057312012, 'timestamp': '2025-09-10 02:51:07.054967', 'step': 12993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:07.108461', 'step': 12993, 'epoch': 2} {'type': 'loss', 'content': 0.21926215291023254, 'timestamp': '2025-09-10 02:51:07.110359', 'step': 12994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:07.165461', 'step': 12994, 'epoch': 2} {'type': 'loss', 'content': 0.14861220121383667, 'timestamp': '2025-09-10 02:51:07.171414', 'step': 12995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:07.235079', 'step': 12995, 'epoch': 2} {'type': 'loss', 'content': 0.12433642894029617, 'timestamp': '2025-09-10 02:51:07.241215', 'step': 12996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:07.299639', 'step': 12996, 'epoch': 2} {'type': 'loss', 'content': 0.10274375975131989, 'timestamp': '2025-09-10 02:51:07.302074', 'step': 12997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:07.355514', 'step': 12997, 'epoch': 2} {'type': 'loss', 'content': 0.13720184564590454, 'timestamp': '2025-09-10 02:51:07.357917', 'step': 12998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:07.416190', 'step': 12998, 'epoch': 2} {'type': 'loss', 'content': 0.20819105207920074, 'timestamp': '2025-09-10 02:51:07.418423', 'step': 12999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:07.473319', 'step': 12999, 'epoch': 2} {'type': 'loss', 'content': 0.10155962407588959, 'timestamp': '2025-09-10 02:51:07.479958', 'step': 13000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13000', 'timestamp': '2025-09-10 02:51:07.895693', 'step': 13000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:07.955064', 'step': 13000, 'epoch': 2} {'type': 'loss', 'content': 0.07218597829341888, 'timestamp': '2025-09-10 02:51:07.957388', 'step': 13001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:08.014477', 'step': 13001, 'epoch': 2} {'type': 'loss', 'content': 0.11349361389875412, 'timestamp': '2025-09-10 02:51:08.016814', 'step': 13002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:08.073883', 'step': 13002, 'epoch': 2} {'type': 'loss', 'content': 0.10585697740316391, 'timestamp': '2025-09-10 02:51:08.076230', 'step': 13003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:08.134013', 'step': 13003, 'epoch': 2} {'type': 'loss', 'content': 0.1206618919968605, 'timestamp': '2025-09-10 02:51:08.140212', 'step': 13004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:08.193851', 'step': 13004, 'epoch': 2} {'type': 'loss', 'content': 0.09270057082176208, 'timestamp': '2025-09-10 02:51:08.195934', 'step': 13005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:08.250453', 'step': 13005, 'epoch': 2} {'type': 'loss', 'content': 0.15393272042274475, 'timestamp': '2025-09-10 02:51:08.252520', 'step': 13006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:08.306207', 'step': 13006, 'epoch': 2} {'type': 'loss', 'content': 0.08248261362314224, 'timestamp': '2025-09-10 02:51:08.308297', 'step': 13007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:08.361664', 'step': 13007, 'epoch': 2} {'type': 'loss', 'content': 0.167677104473114, 'timestamp': '2025-09-10 02:51:08.367491', 'step': 13008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:08.420180', 'step': 13008, 'epoch': 2} {'type': 'loss', 'content': 0.14351443946361542, 'timestamp': '2025-09-10 02:51:08.422196', 'step': 13009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:08.475524', 'step': 13009, 'epoch': 2} {'type': 'loss', 'content': 0.10308901220560074, 'timestamp': '2025-09-10 02:51:08.477434', 'step': 13010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:08.530709', 'step': 13010, 'epoch': 2} {'type': 'loss', 'content': 0.24937118589878082, 'timestamp': '2025-09-10 02:51:08.532457', 'step': 13011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:08.585698', 'step': 13011, 'epoch': 2} {'type': 'loss', 'content': 0.20415277779102325, 'timestamp': '2025-09-10 02:51:08.591620', 'step': 13012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:08.644516', 'step': 13012, 'epoch': 2} {'type': 'loss', 'content': 0.19723983108997345, 'timestamp': '2025-09-10 02:51:08.646531', 'step': 13013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:08.699577', 'step': 13013, 'epoch': 2} {'type': 'loss', 'content': 0.12857431173324585, 'timestamp': '2025-09-10 02:51:08.701958', 'step': 13014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:08.757632', 'step': 13014, 'epoch': 2} {'type': 'loss', 'content': 0.08100069314241409, 'timestamp': '2025-09-10 02:51:08.759845', 'step': 13015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:08.813743', 'step': 13015, 'epoch': 2} {'type': 'loss', 'content': 0.10584597289562225, 'timestamp': '2025-09-10 02:51:08.819607', 'step': 13016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:08.872337', 'step': 13016, 'epoch': 2} {'type': 'loss', 'content': 0.14233849942684174, 'timestamp': '2025-09-10 02:51:08.874381', 'step': 13017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:08.927659', 'step': 13017, 'epoch': 2} {'type': 'loss', 'content': 0.12243802845478058, 'timestamp': '2025-09-10 02:51:08.930109', 'step': 13018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:08.983761', 'step': 13018, 'epoch': 2} {'type': 'loss', 'content': 0.11637192964553833, 'timestamp': '2025-09-10 02:51:08.985821', 'step': 13019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:09.039171', 'step': 13019, 'epoch': 2} {'type': 'loss', 'content': 0.06195438653230667, 'timestamp': '2025-09-10 02:51:09.044732', 'step': 13020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:09.098211', 'step': 13020, 'epoch': 2} {'type': 'loss', 'content': 0.09975407272577286, 'timestamp': '2025-09-10 02:51:09.100030', 'step': 13021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:09.154033', 'step': 13021, 'epoch': 2} {'type': 'loss', 'content': 0.08138462156057358, 'timestamp': '2025-09-10 02:51:09.155787', 'step': 13022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:09.209032', 'step': 13022, 'epoch': 2} {'type': 'loss', 'content': 0.15060803294181824, 'timestamp': '2025-09-10 02:51:09.210806', 'step': 13023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:09.263514', 'step': 13023, 'epoch': 2} {'type': 'loss', 'content': 0.10874317586421967, 'timestamp': '2025-09-10 02:51:09.269428', 'step': 13024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:09.323047', 'step': 13024, 'epoch': 2} {'type': 'loss', 'content': 0.12369750440120697, 'timestamp': '2025-09-10 02:51:09.325198', 'step': 13025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:09.378349', 'step': 13025, 'epoch': 2} {'type': 'loss', 'content': 0.28115707635879517, 'timestamp': '2025-09-10 02:51:09.380438', 'step': 13026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:09.433679', 'step': 13026, 'epoch': 2} {'type': 'loss', 'content': 0.0997425839304924, 'timestamp': '2025-09-10 02:51:09.436036', 'step': 13027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:09.490221', 'step': 13027, 'epoch': 2} {'type': 'loss', 'content': 0.10732275247573853, 'timestamp': '2025-09-10 02:51:09.496341', 'step': 13028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:09.549519', 'step': 13028, 'epoch': 2} {'type': 'loss', 'content': 0.16447095572948456, 'timestamp': '2025-09-10 02:51:09.551355', 'step': 13029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:09.606022', 'step': 13029, 'epoch': 2} {'type': 'loss', 'content': 0.12489450722932816, 'timestamp': '2025-09-10 02:51:09.608245', 'step': 13030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:09.662116', 'step': 13030, 'epoch': 2} {'type': 'loss', 'content': 0.0976831242442131, 'timestamp': '2025-09-10 02:51:09.664539', 'step': 13031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:09.719083', 'step': 13031, 'epoch': 2} {'type': 'loss', 'content': 0.08868305385112762, 'timestamp': '2025-09-10 02:51:09.725313', 'step': 13032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:09.778713', 'step': 13032, 'epoch': 2} {'type': 'loss', 'content': 0.03659224882721901, 'timestamp': '2025-09-10 02:51:09.780743', 'step': 13033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:09.836181', 'step': 13033, 'epoch': 2} {'type': 'loss', 'content': 0.10171107202768326, 'timestamp': '2025-09-10 02:51:09.838433', 'step': 13034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:09.893524', 'step': 13034, 'epoch': 2} {'type': 'loss', 'content': 0.08305270224809647, 'timestamp': '2025-09-10 02:51:09.895561', 'step': 13035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:09.950900', 'step': 13035, 'epoch': 2} {'type': 'loss', 'content': 0.05166463553905487, 'timestamp': '2025-09-10 02:51:09.957071', 'step': 13036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:10.010789', 'step': 13036, 'epoch': 2} {'type': 'loss', 'content': 0.13006938993930817, 'timestamp': '2025-09-10 02:51:10.012626', 'step': 13037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:10.065641', 'step': 13037, 'epoch': 2} {'type': 'loss', 'content': 0.09128045290708542, 'timestamp': '2025-09-10 02:51:10.067487', 'step': 13038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:10.120562', 'step': 13038, 'epoch': 2} {'type': 'loss', 'content': 0.12324841320514679, 'timestamp': '2025-09-10 02:51:10.122403', 'step': 13039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:10.178144', 'step': 13039, 'epoch': 2} {'type': 'loss', 'content': 0.17120689153671265, 'timestamp': '2025-09-10 02:51:10.183799', 'step': 13040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:10.236581', 'step': 13040, 'epoch': 2} {'type': 'loss', 'content': 0.08208394795656204, 'timestamp': '2025-09-10 02:51:10.238555', 'step': 13041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:10.291035', 'step': 13041, 'epoch': 2} {'type': 'loss', 'content': 0.07531296461820602, 'timestamp': '2025-09-10 02:51:10.293002', 'step': 13042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:10.345906', 'step': 13042, 'epoch': 2} {'type': 'loss', 'content': 0.08276797831058502, 'timestamp': '2025-09-10 02:51:10.347934', 'step': 13043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:10.400464', 'step': 13043, 'epoch': 2} {'type': 'loss', 'content': 0.11980921775102615, 'timestamp': '2025-09-10 02:51:10.406231', 'step': 13044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:10.458690', 'step': 13044, 'epoch': 2} {'type': 'loss', 'content': 0.197059765458107, 'timestamp': '2025-09-10 02:51:10.463118', 'step': 13045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:10.517177', 'step': 13045, 'epoch': 2} {'type': 'loss', 'content': 0.030091775581240654, 'timestamp': '2025-09-10 02:51:10.518919', 'step': 13046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:10.572979', 'step': 13046, 'epoch': 2} {'type': 'loss', 'content': 0.06715793162584305, 'timestamp': '2025-09-10 02:51:10.574930', 'step': 13047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:10.629220', 'step': 13047, 'epoch': 2} {'type': 'loss', 'content': 0.06855745613574982, 'timestamp': '2025-09-10 02:51:10.635326', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:51:23.740386', 'step': 13048, 'epoch': 2} {'type': 'pplx', 'content': 14188.120357457874, 'timestamp': '2025-09-10 02:51:23.743440', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:23.797966', 'step': 13048, 'epoch': 2} {'type': 'loss', 'content': 0.11813937872648239, 'timestamp': '2025-09-10 02:51:23.799941', 'step': 13049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:23.854996', 'step': 13049, 'epoch': 2} {'type': 'loss', 'content': 0.08685959875583649, 'timestamp': '2025-09-10 02:51:23.857104', 'step': 13050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:23.913675', 'step': 13050, 'epoch': 2} {'type': 'loss', 'content': 0.13475029170513153, 'timestamp': '2025-09-10 02:51:23.915816', 'step': 13051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:23.972940', 'step': 13051, 'epoch': 2} {'type': 'loss', 'content': 0.06364753842353821, 'timestamp': '2025-09-10 02:51:23.979081', 'step': 13052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:24.034858', 'step': 13052, 'epoch': 2} {'type': 'loss', 'content': 0.11158616840839386, 'timestamp': '2025-09-10 02:51:24.037134', 'step': 13053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:24.092306', 'step': 13053, 'epoch': 2} {'type': 'loss', 'content': 0.11187184602022171, 'timestamp': '2025-09-10 02:51:24.094406', 'step': 13054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:24.150895', 'step': 13054, 'epoch': 2} {'type': 'loss', 'content': 0.13784508407115936, 'timestamp': '2025-09-10 02:51:24.153127', 'step': 13055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:24.206699', 'step': 13055, 'epoch': 2} {'type': 'loss', 'content': 0.11387912184000015, 'timestamp': '2025-09-10 02:51:24.213153', 'step': 13056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:24.265709', 'step': 13056, 'epoch': 2} {'type': 'loss', 'content': 0.27594518661499023, 'timestamp': '2025-09-10 02:51:24.267560', 'step': 13057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:24.320906', 'step': 13057, 'epoch': 2} {'type': 'loss', 'content': 0.09630293399095535, 'timestamp': '2025-09-10 02:51:24.322941', 'step': 13058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:24.376632', 'step': 13058, 'epoch': 2} {'type': 'loss', 'content': 0.10883476585149765, 'timestamp': '2025-09-10 02:51:24.378714', 'step': 13059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:24.432113', 'step': 13059, 'epoch': 2} {'type': 'loss', 'content': 0.11651702225208282, 'timestamp': '2025-09-10 02:51:24.437959', 'step': 13060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:24.490880', 'step': 13060, 'epoch': 2} {'type': 'loss', 'content': 0.16491658985614777, 'timestamp': '2025-09-10 02:51:24.493125', 'step': 13061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:24.546091', 'step': 13061, 'epoch': 2} {'type': 'loss', 'content': 0.13149353861808777, 'timestamp': '2025-09-10 02:51:24.548219', 'step': 13062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:24.605792', 'step': 13062, 'epoch': 2} {'type': 'loss', 'content': 0.16304725408554077, 'timestamp': '2025-09-10 02:51:24.607882', 'step': 13063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:24.665475', 'step': 13063, 'epoch': 2} {'type': 'loss', 'content': 0.08810614794492722, 'timestamp': '2025-09-10 02:51:24.671663', 'step': 13064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:24.725954', 'step': 13064, 'epoch': 2} {'type': 'loss', 'content': 0.11295764148235321, 'timestamp': '2025-09-10 02:51:24.728234', 'step': 13065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:24.783073', 'step': 13065, 'epoch': 2} {'type': 'loss', 'content': 0.055448777973651886, 'timestamp': '2025-09-10 02:51:24.785132', 'step': 13066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:24.838825', 'step': 13066, 'epoch': 2} {'type': 'loss', 'content': 0.13084055483341217, 'timestamp': '2025-09-10 02:51:24.840876', 'step': 13067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:24.895620', 'step': 13067, 'epoch': 2} {'type': 'loss', 'content': 0.1595880389213562, 'timestamp': '2025-09-10 02:51:24.901680', 'step': 13068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:24.955410', 'step': 13068, 'epoch': 2} {'type': 'loss', 'content': 0.07681649178266525, 'timestamp': '2025-09-10 02:51:24.957507', 'step': 13069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:25.011393', 'step': 13069, 'epoch': 2} {'type': 'loss', 'content': 0.08630615472793579, 'timestamp': '2025-09-10 02:51:25.013556', 'step': 13070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:25.067081', 'step': 13070, 'epoch': 2} {'type': 'loss', 'content': 0.07496847957372665, 'timestamp': '2025-09-10 02:51:25.069369', 'step': 13071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:25.122658', 'step': 13071, 'epoch': 2} {'type': 'loss', 'content': 0.08527158945798874, 'timestamp': '2025-09-10 02:51:25.128934', 'step': 13072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:25.182892', 'step': 13072, 'epoch': 2} {'type': 'loss', 'content': 0.1453002393245697, 'timestamp': '2025-09-10 02:51:25.184957', 'step': 13073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:25.238757', 'step': 13073, 'epoch': 2} {'type': 'loss', 'content': 0.1341465711593628, 'timestamp': '2025-09-10 02:51:25.240958', 'step': 13074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:25.295935', 'step': 13074, 'epoch': 2} {'type': 'loss', 'content': 0.1075625941157341, 'timestamp': '2025-09-10 02:51:25.297943', 'step': 13075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:25.350311', 'step': 13075, 'epoch': 2} {'type': 'loss', 'content': 0.06166420504450798, 'timestamp': '2025-09-10 02:51:25.356427', 'step': 13076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:25.411489', 'step': 13076, 'epoch': 2} {'type': 'loss', 'content': 0.09722781181335449, 'timestamp': '2025-09-10 02:51:25.413772', 'step': 13077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:25.468097', 'step': 13077, 'epoch': 2} {'type': 'loss', 'content': 0.15435534715652466, 'timestamp': '2025-09-10 02:51:25.470539', 'step': 13078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:25.524162', 'step': 13078, 'epoch': 2} {'type': 'loss', 'content': 0.08571504801511765, 'timestamp': '2025-09-10 02:51:25.526466', 'step': 13079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:25.581137', 'step': 13079, 'epoch': 2} {'type': 'loss', 'content': 0.16366364061832428, 'timestamp': '2025-09-10 02:51:25.587651', 'step': 13080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:25.642523', 'step': 13080, 'epoch': 2} {'type': 'loss', 'content': 0.19687461853027344, 'timestamp': '2025-09-10 02:51:25.644791', 'step': 13081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:25.701439', 'step': 13081, 'epoch': 2} {'type': 'loss', 'content': 0.1349734216928482, 'timestamp': '2025-09-10 02:51:25.704495', 'step': 13082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:25.759587', 'step': 13082, 'epoch': 2} {'type': 'loss', 'content': 0.09072567522525787, 'timestamp': '2025-09-10 02:51:25.762000', 'step': 13083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:25.817693', 'step': 13083, 'epoch': 2} {'type': 'loss', 'content': 0.14763815701007843, 'timestamp': '2025-09-10 02:51:25.823956', 'step': 13084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:25.877510', 'step': 13084, 'epoch': 2} {'type': 'loss', 'content': 0.1841152310371399, 'timestamp': '2025-09-10 02:51:25.880000', 'step': 13085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:25.932808', 'step': 13085, 'epoch': 2} {'type': 'loss', 'content': 0.1989569216966629, 'timestamp': '2025-09-10 02:51:25.934890', 'step': 13086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:25.988632', 'step': 13086, 'epoch': 2} {'type': 'loss', 'content': 0.22745658457279205, 'timestamp': '2025-09-10 02:51:25.990995', 'step': 13087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:26.045851', 'step': 13087, 'epoch': 2} {'type': 'loss', 'content': 0.12006128579378128, 'timestamp': '2025-09-10 02:51:26.052008', 'step': 13088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:26.106899', 'step': 13088, 'epoch': 2} {'type': 'loss', 'content': 0.11890063434839249, 'timestamp': '2025-09-10 02:51:26.109034', 'step': 13089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:26.164175', 'step': 13089, 'epoch': 2} {'type': 'loss', 'content': 0.1547534018754959, 'timestamp': '2025-09-10 02:51:26.166332', 'step': 13090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:26.228507', 'step': 13090, 'epoch': 2} {'type': 'loss', 'content': 0.1306656152009964, 'timestamp': '2025-09-10 02:51:26.230834', 'step': 13091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:26.286150', 'step': 13091, 'epoch': 2} {'type': 'loss', 'content': 0.1387643963098526, 'timestamp': '2025-09-10 02:51:26.292778', 'step': 13092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:26.348051', 'step': 13092, 'epoch': 2} {'type': 'loss', 'content': 0.1508186310529709, 'timestamp': '2025-09-10 02:51:26.350615', 'step': 13093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:26.408226', 'step': 13093, 'epoch': 2} {'type': 'loss', 'content': 0.10331452637910843, 'timestamp': '2025-09-10 02:51:26.410355', 'step': 13094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:26.465090', 'step': 13094, 'epoch': 2} {'type': 'loss', 'content': 0.1637450009584427, 'timestamp': '2025-09-10 02:51:26.467276', 'step': 13095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:26.522019', 'step': 13095, 'epoch': 2} {'type': 'loss', 'content': 0.14778447151184082, 'timestamp': '2025-09-10 02:51:26.528392', 'step': 13096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:26.581724', 'step': 13096, 'epoch': 2} {'type': 'loss', 'content': 0.10781893879175186, 'timestamp': '2025-09-10 02:51:26.584121', 'step': 13097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:26.638778', 'step': 13097, 'epoch': 2} {'type': 'loss', 'content': 0.11400028318166733, 'timestamp': '2025-09-10 02:51:26.641069', 'step': 13098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:26.695350', 'step': 13098, 'epoch': 2} {'type': 'loss', 'content': 0.16019193828105927, 'timestamp': '2025-09-10 02:51:26.697477', 'step': 13099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:26.752600', 'step': 13099, 'epoch': 2} {'type': 'loss', 'content': 0.10199981927871704, 'timestamp': '2025-09-10 02:51:26.759089', 'step': 13100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:26.814890', 'step': 13100, 'epoch': 2} {'type': 'loss', 'content': 0.10678922384977341, 'timestamp': '2025-09-10 02:51:26.817442', 'step': 13101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:26.871438', 'step': 13101, 'epoch': 2} {'type': 'loss', 'content': 0.22756892442703247, 'timestamp': '2025-09-10 02:51:26.873588', 'step': 13102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:26.928252', 'step': 13102, 'epoch': 2} {'type': 'loss', 'content': 0.1663370132446289, 'timestamp': '2025-09-10 02:51:26.930339', 'step': 13103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:26.983553', 'step': 13103, 'epoch': 2} {'type': 'loss', 'content': 0.17326194047927856, 'timestamp': '2025-09-10 02:51:26.989838', 'step': 13104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:27.043299', 'step': 13104, 'epoch': 2} {'type': 'loss', 'content': 0.11493725329637527, 'timestamp': '2025-09-10 02:51:27.045604', 'step': 13105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:27.099901', 'step': 13105, 'epoch': 2} {'type': 'loss', 'content': 0.1465408205986023, 'timestamp': '2025-09-10 02:51:27.102113', 'step': 13106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:27.155678', 'step': 13106, 'epoch': 2} {'type': 'loss', 'content': 0.06793492287397385, 'timestamp': '2025-09-10 02:51:27.157937', 'step': 13107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:27.212464', 'step': 13107, 'epoch': 2} {'type': 'loss', 'content': 0.16071583330631256, 'timestamp': '2025-09-10 02:51:27.218776', 'step': 13108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:27.272509', 'step': 13108, 'epoch': 2} {'type': 'loss', 'content': 0.12853427231311798, 'timestamp': '2025-09-10 02:51:27.274970', 'step': 13109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:27.329252', 'step': 13109, 'epoch': 2} {'type': 'loss', 'content': 0.10708638280630112, 'timestamp': '2025-09-10 02:51:27.331490', 'step': 13110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:27.385933', 'step': 13110, 'epoch': 2} {'type': 'loss', 'content': 0.08467940986156464, 'timestamp': '2025-09-10 02:51:27.388373', 'step': 13111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:27.443078', 'step': 13111, 'epoch': 2} {'type': 'loss', 'content': 0.025150712579488754, 'timestamp': '2025-09-10 02:51:27.449272', 'step': 13112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:27.503423', 'step': 13112, 'epoch': 2} {'type': 'loss', 'content': 0.08330338448286057, 'timestamp': '2025-09-10 02:51:27.505765', 'step': 13113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:27.561096', 'step': 13113, 'epoch': 2} {'type': 'loss', 'content': 0.16077837347984314, 'timestamp': '2025-09-10 02:51:27.562984', 'step': 13114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:27.619494', 'step': 13114, 'epoch': 2} {'type': 'loss', 'content': 0.13392332196235657, 'timestamp': '2025-09-10 02:51:27.621612', 'step': 13115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:27.675369', 'step': 13115, 'epoch': 2} {'type': 'loss', 'content': 0.1735461801290512, 'timestamp': '2025-09-10 02:51:27.681103', 'step': 13116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:27.734464', 'step': 13116, 'epoch': 2} {'type': 'loss', 'content': 0.094025619328022, 'timestamp': '2025-09-10 02:51:27.736710', 'step': 13117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:27.792192', 'step': 13117, 'epoch': 2} {'type': 'loss', 'content': 0.2438187301158905, 'timestamp': '2025-09-10 02:51:27.794300', 'step': 13118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:27.851663', 'step': 13118, 'epoch': 2} {'type': 'loss', 'content': 0.09196200221776962, 'timestamp': '2025-09-10 02:51:27.853846', 'step': 13119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:27.907902', 'step': 13119, 'epoch': 2} {'type': 'loss', 'content': 0.08215150982141495, 'timestamp': '2025-09-10 02:51:27.913960', 'step': 13120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:27.967442', 'step': 13120, 'epoch': 2} {'type': 'loss', 'content': 0.09104537218809128, 'timestamp': '2025-09-10 02:51:27.969773', 'step': 13121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:28.023495', 'step': 13121, 'epoch': 2} {'type': 'loss', 'content': 0.14906372129917145, 'timestamp': '2025-09-10 02:51:28.025935', 'step': 13122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:28.079943', 'step': 13122, 'epoch': 2} {'type': 'loss', 'content': 0.07782677561044693, 'timestamp': '2025-09-10 02:51:28.082238', 'step': 13123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:28.137403', 'step': 13123, 'epoch': 2} {'type': 'loss', 'content': 0.09929477423429489, 'timestamp': '2025-09-10 02:51:28.143530', 'step': 13124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:28.196678', 'step': 13124, 'epoch': 2} {'type': 'loss', 'content': 0.0836518257856369, 'timestamp': '2025-09-10 02:51:28.198847', 'step': 13125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:28.252463', 'step': 13125, 'epoch': 2} {'type': 'loss', 'content': 0.043006595224142075, 'timestamp': '2025-09-10 02:51:28.254847', 'step': 13126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:28.308754', 'step': 13126, 'epoch': 2} {'type': 'loss', 'content': 0.07848112285137177, 'timestamp': '2025-09-10 02:51:28.311112', 'step': 13127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:28.366021', 'step': 13127, 'epoch': 2} {'type': 'loss', 'content': 0.10059866309165955, 'timestamp': '2025-09-10 02:51:28.372456', 'step': 13128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:28.426174', 'step': 13128, 'epoch': 2} {'type': 'loss', 'content': 0.08046610653400421, 'timestamp': '2025-09-10 02:51:28.428426', 'step': 13129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:28.481983', 'step': 13129, 'epoch': 2} {'type': 'loss', 'content': 0.05621030554175377, 'timestamp': '2025-09-10 02:51:28.484290', 'step': 13130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:28.537911', 'step': 13130, 'epoch': 2} {'type': 'loss', 'content': 0.14454571902751923, 'timestamp': '2025-09-10 02:51:28.539981', 'step': 13131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:28.592692', 'step': 13131, 'epoch': 2} {'type': 'loss', 'content': 0.11952009052038193, 'timestamp': '2025-09-10 02:51:28.598471', 'step': 13132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:28.652901', 'step': 13132, 'epoch': 2} {'type': 'loss', 'content': 0.19307422637939453, 'timestamp': '2025-09-10 02:51:28.655185', 'step': 13133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:28.708270', 'step': 13133, 'epoch': 2} {'type': 'loss', 'content': 0.10067655146121979, 'timestamp': '2025-09-10 02:51:28.710585', 'step': 13134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:28.764750', 'step': 13134, 'epoch': 2} {'type': 'loss', 'content': 0.11157334595918655, 'timestamp': '2025-09-10 02:51:28.767095', 'step': 13135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:28.822237', 'step': 13135, 'epoch': 2} {'type': 'loss', 'content': 0.13484784960746765, 'timestamp': '2025-09-10 02:51:28.828570', 'step': 13136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:28.881636', 'step': 13136, 'epoch': 2} {'type': 'loss', 'content': 0.07656645774841309, 'timestamp': '2025-09-10 02:51:28.885203', 'step': 13137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:28.940231', 'step': 13137, 'epoch': 2} {'type': 'loss', 'content': 0.06650316715240479, 'timestamp': '2025-09-10 02:51:28.942414', 'step': 13138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:28.995901', 'step': 13138, 'epoch': 2} {'type': 'loss', 'content': 0.05978907272219658, 'timestamp': '2025-09-10 02:51:28.997980', 'step': 13139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:29.050687', 'step': 13139, 'epoch': 2} {'type': 'loss', 'content': 0.1081763431429863, 'timestamp': '2025-09-10 02:51:29.056802', 'step': 13140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:29.109508', 'step': 13140, 'epoch': 2} {'type': 'loss', 'content': 0.11099252104759216, 'timestamp': '2025-09-10 02:51:29.111323', 'step': 13141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:29.166203', 'step': 13141, 'epoch': 2} {'type': 'loss', 'content': 0.1077348068356514, 'timestamp': '2025-09-10 02:51:29.168027', 'step': 13142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:29.220584', 'step': 13142, 'epoch': 2} {'type': 'loss', 'content': 0.11820575594902039, 'timestamp': '2025-09-10 02:51:29.222452', 'step': 13143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:29.276006', 'step': 13143, 'epoch': 2} {'type': 'loss', 'content': 0.23884932696819305, 'timestamp': '2025-09-10 02:51:29.281781', 'step': 13144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:29.334853', 'step': 13144, 'epoch': 2} {'type': 'loss', 'content': 0.09282615035772324, 'timestamp': '2025-09-10 02:51:29.336654', 'step': 13145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:29.389706', 'step': 13145, 'epoch': 2} {'type': 'loss', 'content': 0.1491490751504898, 'timestamp': '2025-09-10 02:51:29.391928', 'step': 13146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:29.448313', 'step': 13146, 'epoch': 2} {'type': 'loss', 'content': 0.11590477079153061, 'timestamp': '2025-09-10 02:51:29.450691', 'step': 13147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:29.504300', 'step': 13147, 'epoch': 2} {'type': 'loss', 'content': 0.2103831171989441, 'timestamp': '2025-09-10 02:51:29.510420', 'step': 13148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:29.565219', 'step': 13148, 'epoch': 2} {'type': 'loss', 'content': 0.146406352519989, 'timestamp': '2025-09-10 02:51:29.567546', 'step': 13149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:29.621533', 'step': 13149, 'epoch': 2} {'type': 'loss', 'content': 0.17394159734249115, 'timestamp': '2025-09-10 02:51:29.623465', 'step': 13150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:29.677802', 'step': 13150, 'epoch': 2} {'type': 'loss', 'content': 0.1568722128868103, 'timestamp': '2025-09-10 02:51:29.680016', 'step': 13151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:29.733673', 'step': 13151, 'epoch': 2} {'type': 'loss', 'content': 0.09015855193138123, 'timestamp': '2025-09-10 02:51:29.739679', 'step': 13152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:29.793602', 'step': 13152, 'epoch': 2} {'type': 'loss', 'content': 0.10104558616876602, 'timestamp': '2025-09-10 02:51:29.795488', 'step': 13153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:29.848749', 'step': 13153, 'epoch': 2} {'type': 'loss', 'content': 0.20637695491313934, 'timestamp': '2025-09-10 02:51:29.850608', 'step': 13154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:29.904945', 'step': 13154, 'epoch': 2} {'type': 'loss', 'content': 0.15369102358818054, 'timestamp': '2025-09-10 02:51:29.906965', 'step': 13155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:29.960838', 'step': 13155, 'epoch': 2} {'type': 'loss', 'content': 0.2046489119529724, 'timestamp': '2025-09-10 02:51:29.967277', 'step': 13156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:30.020582', 'step': 13156, 'epoch': 2} {'type': 'loss', 'content': 0.06361259520053864, 'timestamp': '2025-09-10 02:51:30.022920', 'step': 13157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:30.076408', 'step': 13157, 'epoch': 2} {'type': 'loss', 'content': 0.13981623947620392, 'timestamp': '2025-09-10 02:51:30.078670', 'step': 13158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:30.133143', 'step': 13158, 'epoch': 2} {'type': 'loss', 'content': 0.07180741429328918, 'timestamp': '2025-09-10 02:51:30.135591', 'step': 13159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:30.188999', 'step': 13159, 'epoch': 2} {'type': 'loss', 'content': 0.045494403690099716, 'timestamp': '2025-09-10 02:51:30.195185', 'step': 13160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:30.249149', 'step': 13160, 'epoch': 2} {'type': 'loss', 'content': 0.14159536361694336, 'timestamp': '2025-09-10 02:51:30.251023', 'step': 13161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:30.305150', 'step': 13161, 'epoch': 2} {'type': 'loss', 'content': 0.07072307914495468, 'timestamp': '2025-09-10 02:51:30.307019', 'step': 13162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:30.361300', 'step': 13162, 'epoch': 2} {'type': 'loss', 'content': 0.13289035856723785, 'timestamp': '2025-09-10 02:51:30.363197', 'step': 13163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:30.417617', 'step': 13163, 'epoch': 2} {'type': 'loss', 'content': 0.09161587804555893, 'timestamp': '2025-09-10 02:51:30.423916', 'step': 13164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:30.479199', 'step': 13164, 'epoch': 2} {'type': 'loss', 'content': 0.12591548264026642, 'timestamp': '2025-09-10 02:51:30.481320', 'step': 13165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:30.535766', 'step': 13165, 'epoch': 2} {'type': 'loss', 'content': 0.14380702376365662, 'timestamp': '2025-09-10 02:51:30.537822', 'step': 13166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:30.592122', 'step': 13166, 'epoch': 2} {'type': 'loss', 'content': 0.12544280290603638, 'timestamp': '2025-09-10 02:51:30.594478', 'step': 13167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:30.648478', 'step': 13167, 'epoch': 2} {'type': 'loss', 'content': 0.10299768298864365, 'timestamp': '2025-09-10 02:51:30.654504', 'step': 13168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:30.707510', 'step': 13168, 'epoch': 2} {'type': 'loss', 'content': 0.16479229927062988, 'timestamp': '2025-09-10 02:51:30.709367', 'step': 13169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:30.763000', 'step': 13169, 'epoch': 2} {'type': 'loss', 'content': 0.12305016815662384, 'timestamp': '2025-09-10 02:51:30.765637', 'step': 13170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:30.820263', 'step': 13170, 'epoch': 2} {'type': 'loss', 'content': 0.10231149196624756, 'timestamp': '2025-09-10 02:51:30.822628', 'step': 13171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:30.876797', 'step': 13171, 'epoch': 2} {'type': 'loss', 'content': 0.044178105890750885, 'timestamp': '2025-09-10 02:51:30.882973', 'step': 13172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:30.937521', 'step': 13172, 'epoch': 2} {'type': 'loss', 'content': 0.062245745211839676, 'timestamp': '2025-09-10 02:51:30.939616', 'step': 13173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:30.994618', 'step': 13173, 'epoch': 2} {'type': 'loss', 'content': 0.12804096937179565, 'timestamp': '2025-09-10 02:51:30.997182', 'step': 13174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:31.052059', 'step': 13174, 'epoch': 2} {'type': 'loss', 'content': 0.1194189041852951, 'timestamp': '2025-09-10 02:51:31.054596', 'step': 13175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:31.108984', 'step': 13175, 'epoch': 2} {'type': 'loss', 'content': 0.0812583863735199, 'timestamp': '2025-09-10 02:51:31.114930', 'step': 13176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:31.169164', 'step': 13176, 'epoch': 2} {'type': 'loss', 'content': 0.1459026038646698, 'timestamp': '2025-09-10 02:51:31.171098', 'step': 13177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:31.226569', 'step': 13177, 'epoch': 2} {'type': 'loss', 'content': 0.09038744121789932, 'timestamp': '2025-09-10 02:51:31.228536', 'step': 13178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:31.282250', 'step': 13178, 'epoch': 2} {'type': 'loss', 'content': 0.06811968237161636, 'timestamp': '2025-09-10 02:51:31.284317', 'step': 13179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:31.337777', 'step': 13179, 'epoch': 2} {'type': 'loss', 'content': 0.16054639220237732, 'timestamp': '2025-09-10 02:51:31.344012', 'step': 13180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:31.397678', 'step': 13180, 'epoch': 2} {'type': 'loss', 'content': 0.06225798651576042, 'timestamp': '2025-09-10 02:51:31.400011', 'step': 13181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:31.454169', 'step': 13181, 'epoch': 2} {'type': 'loss', 'content': 0.125879168510437, 'timestamp': '2025-09-10 02:51:31.456307', 'step': 13182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:31.510099', 'step': 13182, 'epoch': 2} {'type': 'loss', 'content': 0.13932906091213226, 'timestamp': '2025-09-10 02:51:31.512229', 'step': 13183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:31.565711', 'step': 13183, 'epoch': 2} {'type': 'loss', 'content': 0.06041083112359047, 'timestamp': '2025-09-10 02:51:31.571997', 'step': 13184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:31.624809', 'step': 13184, 'epoch': 2} {'type': 'loss', 'content': 0.08903174847364426, 'timestamp': '2025-09-10 02:51:31.626892', 'step': 13185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:31.680284', 'step': 13185, 'epoch': 2} {'type': 'loss', 'content': 0.1755317598581314, 'timestamp': '2025-09-10 02:51:31.682182', 'step': 13186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:31.735516', 'step': 13186, 'epoch': 2} {'type': 'loss', 'content': 0.10108791291713715, 'timestamp': '2025-09-10 02:51:31.737594', 'step': 13187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:31.791728', 'step': 13187, 'epoch': 2} {'type': 'loss', 'content': 0.074874147772789, 'timestamp': '2025-09-10 02:51:31.797584', 'step': 13188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:31.851696', 'step': 13188, 'epoch': 2} {'type': 'loss', 'content': 0.09201905131340027, 'timestamp': '2025-09-10 02:51:31.853921', 'step': 13189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:31.907356', 'step': 13189, 'epoch': 2} {'type': 'loss', 'content': 0.14761847257614136, 'timestamp': '2025-09-10 02:51:31.909622', 'step': 13190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:31.962969', 'step': 13190, 'epoch': 2} {'type': 'loss', 'content': 0.07646115124225616, 'timestamp': '2025-09-10 02:51:31.965420', 'step': 13191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:32.018546', 'step': 13191, 'epoch': 2} {'type': 'loss', 'content': 0.08907110244035721, 'timestamp': '2025-09-10 02:51:32.024628', 'step': 13192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:32.077390', 'step': 13192, 'epoch': 2} {'type': 'loss', 'content': 0.08985667675733566, 'timestamp': '2025-09-10 02:51:32.079443', 'step': 13193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:32.133247', 'step': 13193, 'epoch': 2} {'type': 'loss', 'content': 0.06609770655632019, 'timestamp': '2025-09-10 02:51:32.135306', 'step': 13194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:32.190908', 'step': 13194, 'epoch': 2} {'type': 'loss', 'content': 0.10735396295785904, 'timestamp': '2025-09-10 02:51:32.193108', 'step': 13195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:32.259725', 'step': 13195, 'epoch': 2} {'type': 'loss', 'content': 0.14682140946388245, 'timestamp': '2025-09-10 02:51:32.265681', 'step': 13196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:32.320023', 'step': 13196, 'epoch': 2} {'type': 'loss', 'content': 0.13652929663658142, 'timestamp': '2025-09-10 02:51:32.322203', 'step': 13197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:32.377267', 'step': 13197, 'epoch': 2} {'type': 'loss', 'content': 0.09039830416440964, 'timestamp': '2025-09-10 02:51:32.379649', 'step': 13198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:32.434344', 'step': 13198, 'epoch': 2} {'type': 'loss', 'content': 0.11533413082361221, 'timestamp': '2025-09-10 02:51:32.436552', 'step': 13199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:32.491732', 'step': 13199, 'epoch': 2} {'type': 'loss', 'content': 0.12201490253210068, 'timestamp': '2025-09-10 02:51:32.498113', 'step': 13200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:32.552607', 'step': 13200, 'epoch': 2} {'type': 'loss', 'content': 0.08303654938936234, 'timestamp': '2025-09-10 02:51:32.554769', 'step': 13201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:32.609706', 'step': 13201, 'epoch': 2} {'type': 'loss', 'content': 0.12647168338298798, 'timestamp': '2025-09-10 02:51:32.611680', 'step': 13202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:32.667345', 'step': 13202, 'epoch': 2} {'type': 'loss', 'content': 0.10807067900896072, 'timestamp': '2025-09-10 02:51:32.669306', 'step': 13203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:32.723122', 'step': 13203, 'epoch': 2} {'type': 'loss', 'content': 0.13197971880435944, 'timestamp': '2025-09-10 02:51:32.729127', 'step': 13204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:32.783763', 'step': 13204, 'epoch': 2} {'type': 'loss', 'content': 0.0797516480088234, 'timestamp': '2025-09-10 02:51:32.785906', 'step': 13205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:32.840606', 'step': 13205, 'epoch': 2} {'type': 'loss', 'content': 0.16864241659641266, 'timestamp': '2025-09-10 02:51:32.842697', 'step': 13206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:32.897066', 'step': 13206, 'epoch': 2} {'type': 'loss', 'content': 0.09664298593997955, 'timestamp': '2025-09-10 02:51:32.899386', 'step': 13207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:32.952940', 'step': 13207, 'epoch': 2} {'type': 'loss', 'content': 0.09474457055330276, 'timestamp': '2025-09-10 02:51:32.959431', 'step': 13208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.013464', 'step': 13208, 'epoch': 2} {'type': 'loss', 'content': 0.15467765927314758, 'timestamp': '2025-09-10 02:51:33.016007', 'step': 13209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.069751', 'step': 13209, 'epoch': 2} {'type': 'loss', 'content': 0.10294554382562637, 'timestamp': '2025-09-10 02:51:33.072085', 'step': 13210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.126495', 'step': 13210, 'epoch': 2} {'type': 'loss', 'content': 0.12602578103542328, 'timestamp': '2025-09-10 02:51:33.128437', 'step': 13211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.182783', 'step': 13211, 'epoch': 2} {'type': 'loss', 'content': 0.04914648458361626, 'timestamp': '2025-09-10 02:51:33.188649', 'step': 13212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:33.243077', 'step': 13212, 'epoch': 2} {'type': 'loss', 'content': 0.046438686549663544, 'timestamp': '2025-09-10 02:51:33.245474', 'step': 13213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:33.300317', 'step': 13213, 'epoch': 2} {'type': 'loss', 'content': 0.07039676606655121, 'timestamp': '2025-09-10 02:51:33.302572', 'step': 13214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:33.356233', 'step': 13214, 'epoch': 2} {'type': 'loss', 'content': 0.11682698130607605, 'timestamp': '2025-09-10 02:51:33.358537', 'step': 13215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.411786', 'step': 13215, 'epoch': 2} {'type': 'loss', 'content': 0.08150990307331085, 'timestamp': '2025-09-10 02:51:33.418068', 'step': 13216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:33.470966', 'step': 13216, 'epoch': 2} {'type': 'loss', 'content': 0.13112005591392517, 'timestamp': '2025-09-10 02:51:33.473306', 'step': 13217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:33.527139', 'step': 13217, 'epoch': 2} {'type': 'loss', 'content': 0.1840858906507492, 'timestamp': '2025-09-10 02:51:33.529190', 'step': 13218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:33.584555', 'step': 13218, 'epoch': 2} {'type': 'loss', 'content': 0.08983553946018219, 'timestamp': '2025-09-10 02:51:33.586525', 'step': 13219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.640851', 'step': 13219, 'epoch': 2} {'type': 'loss', 'content': 0.0984383076429367, 'timestamp': '2025-09-10 02:51:33.647258', 'step': 13220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.701247', 'step': 13220, 'epoch': 2} {'type': 'loss', 'content': 0.11716552823781967, 'timestamp': '2025-09-10 02:51:33.703605', 'step': 13221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:33.758644', 'step': 13221, 'epoch': 2} {'type': 'loss', 'content': 0.13766509294509888, 'timestamp': '2025-09-10 02:51:33.760831', 'step': 13222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.816924', 'step': 13222, 'epoch': 2} {'type': 'loss', 'content': 0.052262868732213974, 'timestamp': '2025-09-10 02:51:33.819200', 'step': 13223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.873263', 'step': 13223, 'epoch': 2} {'type': 'loss', 'content': 0.08251246809959412, 'timestamp': '2025-09-10 02:51:33.879190', 'step': 13224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:33.934067', 'step': 13224, 'epoch': 2} {'type': 'loss', 'content': 0.23285558819770813, 'timestamp': '2025-09-10 02:51:33.935887', 'step': 13225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:33.990546', 'step': 13225, 'epoch': 2} {'type': 'loss', 'content': 0.1446467489004135, 'timestamp': '2025-09-10 02:51:33.993688', 'step': 13226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:34.049235', 'step': 13226, 'epoch': 2} {'type': 'loss', 'content': 0.13971932232379913, 'timestamp': '2025-09-10 02:51:34.051351', 'step': 13227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:34.105510', 'step': 13227, 'epoch': 2} {'type': 'loss', 'content': 0.13430647552013397, 'timestamp': '2025-09-10 02:51:34.111264', 'step': 13228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:34.166199', 'step': 13228, 'epoch': 2} {'type': 'loss', 'content': 0.10439647734165192, 'timestamp': '2025-09-10 02:51:34.168384', 'step': 13229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:34.222733', 'step': 13229, 'epoch': 2} {'type': 'loss', 'content': 0.1010880395770073, 'timestamp': '2025-09-10 02:51:34.225111', 'step': 13230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:34.280225', 'step': 13230, 'epoch': 2} {'type': 'loss', 'content': 0.18468384444713593, 'timestamp': '2025-09-10 02:51:34.282570', 'step': 13231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:34.336075', 'step': 13231, 'epoch': 2} {'type': 'loss', 'content': 0.23951084911823273, 'timestamp': '2025-09-10 02:51:34.342525', 'step': 13232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:34.397556', 'step': 13232, 'epoch': 2} {'type': 'loss', 'content': 0.12126610428094864, 'timestamp': '2025-09-10 02:51:34.399744', 'step': 13233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:34.453899', 'step': 13233, 'epoch': 2} {'type': 'loss', 'content': 0.09400943666696548, 'timestamp': '2025-09-10 02:51:34.456182', 'step': 13234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:34.510936', 'step': 13234, 'epoch': 2} {'type': 'loss', 'content': 0.1399238258600235, 'timestamp': '2025-09-10 02:51:34.513050', 'step': 13235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:34.568256', 'step': 13235, 'epoch': 2} {'type': 'loss', 'content': 0.08803296834230423, 'timestamp': '2025-09-10 02:51:34.574457', 'step': 13236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:34.629053', 'step': 13236, 'epoch': 2} {'type': 'loss', 'content': 0.0887850821018219, 'timestamp': '2025-09-10 02:51:34.631076', 'step': 13237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:34.684885', 'step': 13237, 'epoch': 2} {'type': 'loss', 'content': 0.12665820121765137, 'timestamp': '2025-09-10 02:51:34.687232', 'step': 13238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:34.741324', 'step': 13238, 'epoch': 2} {'type': 'loss', 'content': 0.13800017535686493, 'timestamp': '2025-09-10 02:51:34.743637', 'step': 13239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:34.797304', 'step': 13239, 'epoch': 2} {'type': 'loss', 'content': 0.15812289714813232, 'timestamp': '2025-09-10 02:51:34.803660', 'step': 13240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:34.856726', 'step': 13240, 'epoch': 2} {'type': 'loss', 'content': 0.1288989782333374, 'timestamp': '2025-09-10 02:51:34.859204', 'step': 13241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:34.914835', 'step': 13241, 'epoch': 2} {'type': 'loss', 'content': 0.052616722881793976, 'timestamp': '2025-09-10 02:51:34.916973', 'step': 13242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:34.969446', 'step': 13242, 'epoch': 2} {'type': 'loss', 'content': 0.16298526525497437, 'timestamp': '2025-09-10 02:51:34.971499', 'step': 13243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:35.025083', 'step': 13243, 'epoch': 2} {'type': 'loss', 'content': 0.07632829248905182, 'timestamp': '2025-09-10 02:51:35.031546', 'step': 13244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:35.089301', 'step': 13244, 'epoch': 2} {'type': 'loss', 'content': 0.18707461655139923, 'timestamp': '2025-09-10 02:51:35.091624', 'step': 13245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:35.151919', 'step': 13245, 'epoch': 2} {'type': 'loss', 'content': 0.18646356463432312, 'timestamp': '2025-09-10 02:51:35.154144', 'step': 13246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:35.206712', 'step': 13246, 'epoch': 2} {'type': 'loss', 'content': 0.15225514769554138, 'timestamp': '2025-09-10 02:51:35.208789', 'step': 13247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:35.267171', 'step': 13247, 'epoch': 2} {'type': 'loss', 'content': 0.0752529501914978, 'timestamp': '2025-09-10 02:51:35.273313', 'step': 13248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:35.326589', 'step': 13248, 'epoch': 2} {'type': 'loss', 'content': 0.13121481239795685, 'timestamp': '2025-09-10 02:51:35.328845', 'step': 13249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:35.387682', 'step': 13249, 'epoch': 2} {'type': 'loss', 'content': 0.15346387028694153, 'timestamp': '2025-09-10 02:51:35.389881', 'step': 13250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:35.443869', 'step': 13250, 'epoch': 2} {'type': 'loss', 'content': 0.10246966034173965, 'timestamp': '2025-09-10 02:51:35.446327', 'step': 13251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:35.499291', 'step': 13251, 'epoch': 2} {'type': 'loss', 'content': 0.08865603804588318, 'timestamp': '2025-09-10 02:51:35.508870', 'step': 13252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:35.560941', 'step': 13252, 'epoch': 2} {'type': 'loss', 'content': 0.10671892017126083, 'timestamp': '2025-09-10 02:51:35.563198', 'step': 13253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:35.617195', 'step': 13253, 'epoch': 2} {'type': 'loss', 'content': 0.13839900493621826, 'timestamp': '2025-09-10 02:51:35.619561', 'step': 13254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:35.683093', 'step': 13254, 'epoch': 2} {'type': 'loss', 'content': 0.11742766201496124, 'timestamp': '2025-09-10 02:51:35.686406', 'step': 13255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:35.738395', 'step': 13255, 'epoch': 2} {'type': 'loss', 'content': 0.13656720519065857, 'timestamp': '2025-09-10 02:51:35.744203', 'step': 13256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:35.801263', 'step': 13256, 'epoch': 2} {'type': 'loss', 'content': 0.10785625129938126, 'timestamp': '2025-09-10 02:51:35.806516', 'step': 13257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:35.860735', 'step': 13257, 'epoch': 2} {'type': 'loss', 'content': 0.055259980261325836, 'timestamp': '2025-09-10 02:51:35.862679', 'step': 13258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:35.915646', 'step': 13258, 'epoch': 2} {'type': 'loss', 'content': 0.07755151391029358, 'timestamp': '2025-09-10 02:51:35.917669', 'step': 13259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:35.969841', 'step': 13259, 'epoch': 2} {'type': 'loss', 'content': 0.08176710456609726, 'timestamp': '2025-09-10 02:51:35.977128', 'step': 13260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:36.029256', 'step': 13260, 'epoch': 2} {'type': 'loss', 'content': 0.24125050008296967, 'timestamp': '2025-09-10 02:51:36.031342', 'step': 13261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:36.087385', 'step': 13261, 'epoch': 2} {'type': 'loss', 'content': 0.17393164336681366, 'timestamp': '2025-09-10 02:51:36.089283', 'step': 13262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:36.145974', 'step': 13262, 'epoch': 2} {'type': 'loss', 'content': 0.02900436706840992, 'timestamp': '2025-09-10 02:51:36.148193', 'step': 13263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:36.200158', 'step': 13263, 'epoch': 2} {'type': 'loss', 'content': 0.08725559711456299, 'timestamp': '2025-09-10 02:51:36.206100', 'step': 13264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:36.264102', 'step': 13264, 'epoch': 2} {'type': 'loss', 'content': 0.055250752717256546, 'timestamp': '2025-09-10 02:51:36.266356', 'step': 13265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:36.318791', 'step': 13265, 'epoch': 2} {'type': 'loss', 'content': 0.1621236503124237, 'timestamp': '2025-09-10 02:51:36.321158', 'step': 13266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:36.373890', 'step': 13266, 'epoch': 2} {'type': 'loss', 'content': 0.11363624036312103, 'timestamp': '2025-09-10 02:51:36.376519', 'step': 13267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:36.430532', 'step': 13267, 'epoch': 2} {'type': 'loss', 'content': 0.14450354874134064, 'timestamp': '2025-09-10 02:51:36.436910', 'step': 13268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:36.490447', 'step': 13268, 'epoch': 2} {'type': 'loss', 'content': 0.10993663221597672, 'timestamp': '2025-09-10 02:51:36.492546', 'step': 13269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:36.547660', 'step': 13269, 'epoch': 2} {'type': 'loss', 'content': 0.08122991770505905, 'timestamp': '2025-09-10 02:51:36.550828', 'step': 13270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:36.606098', 'step': 13270, 'epoch': 2} {'type': 'loss', 'content': 0.054134804755449295, 'timestamp': '2025-09-10 02:51:36.611355', 'step': 13271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:36.667093', 'step': 13271, 'epoch': 2} {'type': 'loss', 'content': 0.10702367126941681, 'timestamp': '2025-09-10 02:51:36.673667', 'step': 13272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:36.727444', 'step': 13272, 'epoch': 2} {'type': 'loss', 'content': 0.15841153264045715, 'timestamp': '2025-09-10 02:51:36.729554', 'step': 13273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:36.783768', 'step': 13273, 'epoch': 2} {'type': 'loss', 'content': 0.11924603581428528, 'timestamp': '2025-09-10 02:51:36.785860', 'step': 13274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:36.840555', 'step': 13274, 'epoch': 2} {'type': 'loss', 'content': 0.12383640557527542, 'timestamp': '2025-09-10 02:51:36.843519', 'step': 13275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:36.900743', 'step': 13275, 'epoch': 2} {'type': 'loss', 'content': 0.10052311420440674, 'timestamp': '2025-09-10 02:51:36.906978', 'step': 13276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:36.980145', 'step': 13276, 'epoch': 2} {'type': 'loss', 'content': 0.07296475768089294, 'timestamp': '2025-09-10 02:51:36.982209', 'step': 13277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:37.036022', 'step': 13277, 'epoch': 2} {'type': 'loss', 'content': 0.12640278041362762, 'timestamp': '2025-09-10 02:51:37.041485', 'step': 13278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:37.096795', 'step': 13278, 'epoch': 2} {'type': 'loss', 'content': 0.030170267447829247, 'timestamp': '2025-09-10 02:51:37.099053', 'step': 13279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:37.152558', 'step': 13279, 'epoch': 2} {'type': 'loss', 'content': 0.16139261424541473, 'timestamp': '2025-09-10 02:51:37.158865', 'step': 13280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:37.211402', 'step': 13280, 'epoch': 2} {'type': 'loss', 'content': 0.11873079836368561, 'timestamp': '2025-09-10 02:51:37.216651', 'step': 13281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:37.279285', 'step': 13281, 'epoch': 2} {'type': 'loss', 'content': 0.19995397329330444, 'timestamp': '2025-09-10 02:51:37.281409', 'step': 13282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:37.348513', 'step': 13282, 'epoch': 2} {'type': 'loss', 'content': 0.10113584250211716, 'timestamp': '2025-09-10 02:51:37.351007', 'step': 13283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:37.412206', 'step': 13283, 'epoch': 2} {'type': 'loss', 'content': 0.0926629975438118, 'timestamp': '2025-09-10 02:51:37.419626', 'step': 13284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:37.485869', 'step': 13284, 'epoch': 2} {'type': 'loss', 'content': 0.10455624759197235, 'timestamp': '2025-09-10 02:51:37.488705', 'step': 13285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:37.549341', 'step': 13285, 'epoch': 2} {'type': 'loss', 'content': 0.1645566076040268, 'timestamp': '2025-09-10 02:51:37.551750', 'step': 13286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:37.611412', 'step': 13286, 'epoch': 2} {'type': 'loss', 'content': 0.09031565487384796, 'timestamp': '2025-09-10 02:51:37.614001', 'step': 13287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:37.674409', 'step': 13287, 'epoch': 2} {'type': 'loss', 'content': 0.07379716634750366, 'timestamp': '2025-09-10 02:51:37.680937', 'step': 13288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:37.735007', 'step': 13288, 'epoch': 2} {'type': 'loss', 'content': 0.08885440230369568, 'timestamp': '2025-09-10 02:51:37.737174', 'step': 13289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:37.792881', 'step': 13289, 'epoch': 2} {'type': 'loss', 'content': 0.12364344298839569, 'timestamp': '2025-09-10 02:51:37.795119', 'step': 13290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:37.847781', 'step': 13290, 'epoch': 2} {'type': 'loss', 'content': 0.15042759478092194, 'timestamp': '2025-09-10 02:51:37.850132', 'step': 13291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:37.903491', 'step': 13291, 'epoch': 2} {'type': 'loss', 'content': 0.09002397209405899, 'timestamp': '2025-09-10 02:51:37.909162', 'step': 13292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:37.963097', 'step': 13292, 'epoch': 2} {'type': 'loss', 'content': 0.15658150613307953, 'timestamp': '2025-09-10 02:51:37.965374', 'step': 13293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:38.018418', 'step': 13293, 'epoch': 2} {'type': 'loss', 'content': 0.11062849313020706, 'timestamp': '2025-09-10 02:51:38.020691', 'step': 13294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:38.075800', 'step': 13294, 'epoch': 2} {'type': 'loss', 'content': 0.030403897166252136, 'timestamp': '2025-09-10 02:51:38.078214', 'step': 13295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:38.131385', 'step': 13295, 'epoch': 2} {'type': 'loss', 'content': 0.13870270550251007, 'timestamp': '2025-09-10 02:51:38.137348', 'step': 13296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:38.189956', 'step': 13296, 'epoch': 2} {'type': 'loss', 'content': 0.1205853596329689, 'timestamp': '2025-09-10 02:51:38.192188', 'step': 13297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:38.244712', 'step': 13297, 'epoch': 2} {'type': 'loss', 'content': 0.09949243813753128, 'timestamp': '2025-09-10 02:51:38.247303', 'step': 13298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:38.302070', 'step': 13298, 'epoch': 2} {'type': 'loss', 'content': 0.09014734625816345, 'timestamp': '2025-09-10 02:51:38.304704', 'step': 13299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:38.358747', 'step': 13299, 'epoch': 2} {'type': 'loss', 'content': 0.10779020935297012, 'timestamp': '2025-09-10 02:51:38.364731', 'step': 13300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:38.417579', 'step': 13300, 'epoch': 2} {'type': 'loss', 'content': 0.07399938255548477, 'timestamp': '2025-09-10 02:51:38.419823', 'step': 13301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:38.472666', 'step': 13301, 'epoch': 2} {'type': 'loss', 'content': 0.15227022767066956, 'timestamp': '2025-09-10 02:51:38.474945', 'step': 13302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:38.528612', 'step': 13302, 'epoch': 2} {'type': 'loss', 'content': 0.05365936458110809, 'timestamp': '2025-09-10 02:51:38.530959', 'step': 13303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:38.585593', 'step': 13303, 'epoch': 2} {'type': 'loss', 'content': 0.09212712943553925, 'timestamp': '2025-09-10 02:51:38.591444', 'step': 13304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:38.645083', 'step': 13304, 'epoch': 2} {'type': 'loss', 'content': 0.13227112591266632, 'timestamp': '2025-09-10 02:51:38.647309', 'step': 13305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:38.700477', 'step': 13305, 'epoch': 2} {'type': 'loss', 'content': 0.1419856995344162, 'timestamp': '2025-09-10 02:51:38.702782', 'step': 13306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:38.756066', 'step': 13306, 'epoch': 2} {'type': 'loss', 'content': 0.1876821517944336, 'timestamp': '2025-09-10 02:51:38.758677', 'step': 13307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:38.814703', 'step': 13307, 'epoch': 2} {'type': 'loss', 'content': 0.09509194642305374, 'timestamp': '2025-09-10 02:51:38.820761', 'step': 13308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:38.873311', 'step': 13308, 'epoch': 2} {'type': 'loss', 'content': 0.10713041573762894, 'timestamp': '2025-09-10 02:51:38.875765', 'step': 13309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:38.929647', 'step': 13309, 'epoch': 2} {'type': 'loss', 'content': 0.2090504914522171, 'timestamp': '2025-09-10 02:51:38.932053', 'step': 13310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:38.986605', 'step': 13310, 'epoch': 2} {'type': 'loss', 'content': 0.13510705530643463, 'timestamp': '2025-09-10 02:51:38.988960', 'step': 13311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:39.042429', 'step': 13311, 'epoch': 2} {'type': 'loss', 'content': 0.08754836767911911, 'timestamp': '2025-09-10 02:51:39.048579', 'step': 13312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:39.102032', 'step': 13312, 'epoch': 2} {'type': 'loss', 'content': 0.10812640190124512, 'timestamp': '2025-09-10 02:51:39.104405', 'step': 13313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:39.157715', 'step': 13313, 'epoch': 2} {'type': 'loss', 'content': 0.08461083471775055, 'timestamp': '2025-09-10 02:51:39.160051', 'step': 13314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:39.213607', 'step': 13314, 'epoch': 2} {'type': 'loss', 'content': 0.15750055015087128, 'timestamp': '2025-09-10 02:51:39.215890', 'step': 13315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:39.270007', 'step': 13315, 'epoch': 2} {'type': 'loss', 'content': 0.10783683508634567, 'timestamp': '2025-09-10 02:51:39.275773', 'step': 13316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:39.328451', 'step': 13316, 'epoch': 2} {'type': 'loss', 'content': 0.14445029199123383, 'timestamp': '2025-09-10 02:51:39.330813', 'step': 13317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:39.385032', 'step': 13317, 'epoch': 2} {'type': 'loss', 'content': 0.1230967566370964, 'timestamp': '2025-09-10 02:51:39.387465', 'step': 13318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:39.440458', 'step': 13318, 'epoch': 2} {'type': 'loss', 'content': 0.1527709811925888, 'timestamp': '2025-09-10 02:51:39.442716', 'step': 13319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:39.497551', 'step': 13319, 'epoch': 2} {'type': 'loss', 'content': 0.1859361082315445, 'timestamp': '2025-09-10 02:51:39.503480', 'step': 13320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:39.556480', 'step': 13320, 'epoch': 2} {'type': 'loss', 'content': 0.12191180139780045, 'timestamp': '2025-09-10 02:51:39.558729', 'step': 13321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:39.612291', 'step': 13321, 'epoch': 2} {'type': 'loss', 'content': 0.12578774988651276, 'timestamp': '2025-09-10 02:51:39.614410', 'step': 13322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:39.667612', 'step': 13322, 'epoch': 2} {'type': 'loss', 'content': 0.10021591186523438, 'timestamp': '2025-09-10 02:51:39.669712', 'step': 13323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:39.722916', 'step': 13323, 'epoch': 2} {'type': 'loss', 'content': 0.07156619429588318, 'timestamp': '2025-09-10 02:51:39.729183', 'step': 13324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:39.781288', 'step': 13324, 'epoch': 2} {'type': 'loss', 'content': 0.11582860350608826, 'timestamp': '2025-09-10 02:51:39.783584', 'step': 13325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:39.837255', 'step': 13325, 'epoch': 2} {'type': 'loss', 'content': 0.1379958689212799, 'timestamp': '2025-09-10 02:51:39.839585', 'step': 13326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:39.893304', 'step': 13326, 'epoch': 2} {'type': 'loss', 'content': 0.1217096820473671, 'timestamp': '2025-09-10 02:51:39.895645', 'step': 13327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:39.948564', 'step': 13327, 'epoch': 2} {'type': 'loss', 'content': 0.1156517043709755, 'timestamp': '2025-09-10 02:51:39.954503', 'step': 13328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:40.007552', 'step': 13328, 'epoch': 2} {'type': 'loss', 'content': 0.09907057881355286, 'timestamp': '2025-09-10 02:51:40.009729', 'step': 13329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:40.063236', 'step': 13329, 'epoch': 2} {'type': 'loss', 'content': 0.16456490755081177, 'timestamp': '2025-09-10 02:51:40.065623', 'step': 13330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:40.118777', 'step': 13330, 'epoch': 2} {'type': 'loss', 'content': 0.09684652090072632, 'timestamp': '2025-09-10 02:51:40.121117', 'step': 13331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:40.174841', 'step': 13331, 'epoch': 2} {'type': 'loss', 'content': 0.08272501826286316, 'timestamp': '2025-09-10 02:51:40.180824', 'step': 13332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:40.233950', 'step': 13332, 'epoch': 2} {'type': 'loss', 'content': 0.11890367418527603, 'timestamp': '2025-09-10 02:51:40.236189', 'step': 13333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:40.290460', 'step': 13333, 'epoch': 2} {'type': 'loss', 'content': 0.047309037297964096, 'timestamp': '2025-09-10 02:51:40.292791', 'step': 13334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:40.345627', 'step': 13334, 'epoch': 2} {'type': 'loss', 'content': 0.1255059540271759, 'timestamp': '2025-09-10 02:51:40.347981', 'step': 13335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:40.401616', 'step': 13335, 'epoch': 2} {'type': 'loss', 'content': 0.08153712749481201, 'timestamp': '2025-09-10 02:51:40.407697', 'step': 13336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:40.461392', 'step': 13336, 'epoch': 2} {'type': 'loss', 'content': 0.0688091441988945, 'timestamp': '2025-09-10 02:51:40.463836', 'step': 13337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:40.518186', 'step': 13337, 'epoch': 2} {'type': 'loss', 'content': 0.06928475946187973, 'timestamp': '2025-09-10 02:51:40.520672', 'step': 13338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:40.574375', 'step': 13338, 'epoch': 2} {'type': 'loss', 'content': 0.12541340291500092, 'timestamp': '2025-09-10 02:51:40.576733', 'step': 13339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:40.631060', 'step': 13339, 'epoch': 2} {'type': 'loss', 'content': 0.12288139760494232, 'timestamp': '2025-09-10 02:51:40.637170', 'step': 13340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:40.690431', 'step': 13340, 'epoch': 2} {'type': 'loss', 'content': 0.1883232295513153, 'timestamp': '2025-09-10 02:51:40.692502', 'step': 13341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:40.744956', 'step': 13341, 'epoch': 2} {'type': 'loss', 'content': 0.11161062866449356, 'timestamp': '2025-09-10 02:51:40.747357', 'step': 13342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:40.801854', 'step': 13342, 'epoch': 2} {'type': 'loss', 'content': 0.15014562010765076, 'timestamp': '2025-09-10 02:51:40.804279', 'step': 13343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:40.857241', 'step': 13343, 'epoch': 2} {'type': 'loss', 'content': 0.13829366862773895, 'timestamp': '2025-09-10 02:51:40.863270', 'step': 13344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:40.915549', 'step': 13344, 'epoch': 2} {'type': 'loss', 'content': 0.043394554406404495, 'timestamp': '2025-09-10 02:51:40.917870', 'step': 13345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:40.972201', 'step': 13345, 'epoch': 2} {'type': 'loss', 'content': 0.1664198487997055, 'timestamp': '2025-09-10 02:51:40.974717', 'step': 13346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:41.027986', 'step': 13346, 'epoch': 2} {'type': 'loss', 'content': 0.08412464708089828, 'timestamp': '2025-09-10 02:51:41.030203', 'step': 13347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:41.083605', 'step': 13347, 'epoch': 2} {'type': 'loss', 'content': 0.13837337493896484, 'timestamp': '2025-09-10 02:51:41.089590', 'step': 13348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:41.141764', 'step': 13348, 'epoch': 2} {'type': 'loss', 'content': 0.23924820125102997, 'timestamp': '2025-09-10 02:51:41.144403', 'step': 13349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:41.198172', 'step': 13349, 'epoch': 2} {'type': 'loss', 'content': 0.09888173639774323, 'timestamp': '2025-09-10 02:51:41.200625', 'step': 13350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:41.254292', 'step': 13350, 'epoch': 2} {'type': 'loss', 'content': 0.13426749408245087, 'timestamp': '2025-09-10 02:51:41.256683', 'step': 13351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:41.310464', 'step': 13351, 'epoch': 2} {'type': 'loss', 'content': 0.07607658952474594, 'timestamp': '2025-09-10 02:51:41.316738', 'step': 13352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:41.369512', 'step': 13352, 'epoch': 2} {'type': 'loss', 'content': 0.08687146753072739, 'timestamp': '2025-09-10 02:51:41.371913', 'step': 13353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:41.424783', 'step': 13353, 'epoch': 2} {'type': 'loss', 'content': 0.06728176027536392, 'timestamp': '2025-09-10 02:51:41.427307', 'step': 13354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:41.480290', 'step': 13354, 'epoch': 2} {'type': 'loss', 'content': 0.14899225533008575, 'timestamp': '2025-09-10 02:51:41.482621', 'step': 13355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:41.535562', 'step': 13355, 'epoch': 2} {'type': 'loss', 'content': 0.14677464962005615, 'timestamp': '2025-09-10 02:51:41.541541', 'step': 13356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:41.594417', 'step': 13356, 'epoch': 2} {'type': 'loss', 'content': 0.13843132555484772, 'timestamp': '2025-09-10 02:51:41.596592', 'step': 13357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:41.649493', 'step': 13357, 'epoch': 2} {'type': 'loss', 'content': 0.0930701494216919, 'timestamp': '2025-09-10 02:51:41.651823', 'step': 13358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:41.705143', 'step': 13358, 'epoch': 2} {'type': 'loss', 'content': 0.050879839807748795, 'timestamp': '2025-09-10 02:51:41.707504', 'step': 13359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:41.761907', 'step': 13359, 'epoch': 2} {'type': 'loss', 'content': 0.15213008224964142, 'timestamp': '2025-09-10 02:51:41.767955', 'step': 13360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:41.820658', 'step': 13360, 'epoch': 2} {'type': 'loss', 'content': 0.0996594950556755, 'timestamp': '2025-09-10 02:51:41.823125', 'step': 13361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:41.875890', 'step': 13361, 'epoch': 2} {'type': 'loss', 'content': 0.0720968246459961, 'timestamp': '2025-09-10 02:51:41.878292', 'step': 13362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:41.931293', 'step': 13362, 'epoch': 2} {'type': 'loss', 'content': 0.07759588211774826, 'timestamp': '2025-09-10 02:51:41.933666', 'step': 13363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:41.986773', 'step': 13363, 'epoch': 2} {'type': 'loss', 'content': 0.1901385486125946, 'timestamp': '2025-09-10 02:51:41.992672', 'step': 13364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:42.045534', 'step': 13364, 'epoch': 2} {'type': 'loss', 'content': 0.1479579359292984, 'timestamp': '2025-09-10 02:51:42.047801', 'step': 13365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:42.100802', 'step': 13365, 'epoch': 2} {'type': 'loss', 'content': 0.13425196707248688, 'timestamp': '2025-09-10 02:51:42.103225', 'step': 13366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:42.157010', 'step': 13366, 'epoch': 2} {'type': 'loss', 'content': 0.14290191233158112, 'timestamp': '2025-09-10 02:51:42.159460', 'step': 13367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:42.212852', 'step': 13367, 'epoch': 2} {'type': 'loss', 'content': 0.19125935435295105, 'timestamp': '2025-09-10 02:51:42.218778', 'step': 13368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:42.271576', 'step': 13368, 'epoch': 2} {'type': 'loss', 'content': 0.1087363064289093, 'timestamp': '2025-09-10 02:51:42.273704', 'step': 13369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:42.328422', 'step': 13369, 'epoch': 2} {'type': 'loss', 'content': 0.14532992243766785, 'timestamp': '2025-09-10 02:51:42.330769', 'step': 13370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:42.384616', 'step': 13370, 'epoch': 2} {'type': 'loss', 'content': 0.10602932423353195, 'timestamp': '2025-09-10 02:51:42.387003', 'step': 13371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:42.440240', 'step': 13371, 'epoch': 2} {'type': 'loss', 'content': 0.11770285665988922, 'timestamp': '2025-09-10 02:51:42.446075', 'step': 13372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:42.498861', 'step': 13372, 'epoch': 2} {'type': 'loss', 'content': 0.09121192246675491, 'timestamp': '2025-09-10 02:51:42.501071', 'step': 13373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:42.554382', 'step': 13373, 'epoch': 2} {'type': 'loss', 'content': 0.2516939043998718, 'timestamp': '2025-09-10 02:51:42.556382', 'step': 13374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:42.610662', 'step': 13374, 'epoch': 2} {'type': 'loss', 'content': 0.18710845708847046, 'timestamp': '2025-09-10 02:51:42.612894', 'step': 13375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:42.666841', 'step': 13375, 'epoch': 2} {'type': 'loss', 'content': 0.1050434336066246, 'timestamp': '2025-09-10 02:51:42.673365', 'step': 13376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:42.725411', 'step': 13376, 'epoch': 2} {'type': 'loss', 'content': 0.11602504551410675, 'timestamp': '2025-09-10 02:51:42.727751', 'step': 13377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:42.780715', 'step': 13377, 'epoch': 2} {'type': 'loss', 'content': 0.06731978803873062, 'timestamp': '2025-09-10 02:51:42.782992', 'step': 13378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:42.837658', 'step': 13378, 'epoch': 2} {'type': 'loss', 'content': 0.14729587733745575, 'timestamp': '2025-09-10 02:51:42.839963', 'step': 13379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:42.893648', 'step': 13379, 'epoch': 2} {'type': 'loss', 'content': 0.13940821588039398, 'timestamp': '2025-09-10 02:51:42.900277', 'step': 13380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:42.953273', 'step': 13380, 'epoch': 2} {'type': 'loss', 'content': 0.10232602804899216, 'timestamp': '2025-09-10 02:51:42.955746', 'step': 13381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:43.009117', 'step': 13381, 'epoch': 2} {'type': 'loss', 'content': 0.16711342334747314, 'timestamp': '2025-09-10 02:51:43.011489', 'step': 13382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:43.065587', 'step': 13382, 'epoch': 2} {'type': 'loss', 'content': 0.18815059959888458, 'timestamp': '2025-09-10 02:51:43.067928', 'step': 13383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:43.121299', 'step': 13383, 'epoch': 2} {'type': 'loss', 'content': 0.10634105652570724, 'timestamp': '2025-09-10 02:51:43.127755', 'step': 13384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:43.180564', 'step': 13384, 'epoch': 2} {'type': 'loss', 'content': 0.0542609840631485, 'timestamp': '2025-09-10 02:51:43.182995', 'step': 13385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:43.236271', 'step': 13385, 'epoch': 2} {'type': 'loss', 'content': 0.11913501471281052, 'timestamp': '2025-09-10 02:51:43.238551', 'step': 13386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:43.299659', 'step': 13386, 'epoch': 2} {'type': 'loss', 'content': 0.05011848732829094, 'timestamp': '2025-09-10 02:51:43.301924', 'step': 13387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:43.356064', 'step': 13387, 'epoch': 2} {'type': 'loss', 'content': 0.06822987645864487, 'timestamp': '2025-09-10 02:51:43.362517', 'step': 13388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:43.415968', 'step': 13388, 'epoch': 2} {'type': 'loss', 'content': 0.07433488219976425, 'timestamp': '2025-09-10 02:51:43.418291', 'step': 13389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:43.471927', 'step': 13389, 'epoch': 2} {'type': 'loss', 'content': 0.06554634124040604, 'timestamp': '2025-09-10 02:51:43.477635', 'step': 13390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:43.530997', 'step': 13390, 'epoch': 2} {'type': 'loss', 'content': 0.141210675239563, 'timestamp': '2025-09-10 02:51:43.533449', 'step': 13391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:43.586421', 'step': 13391, 'epoch': 2} {'type': 'loss', 'content': 0.147063747048378, 'timestamp': '2025-09-10 02:51:43.592666', 'step': 13392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:43.646853', 'step': 13392, 'epoch': 2} {'type': 'loss', 'content': 0.1921602189540863, 'timestamp': '2025-09-10 02:51:43.649183', 'step': 13393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:43.702635', 'step': 13393, 'epoch': 2} {'type': 'loss', 'content': 0.09367038309574127, 'timestamp': '2025-09-10 02:51:43.704957', 'step': 13394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:43.758126', 'step': 13394, 'epoch': 2} {'type': 'loss', 'content': 0.10301153361797333, 'timestamp': '2025-09-10 02:51:43.760604', 'step': 13395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:43.814752', 'step': 13395, 'epoch': 2} {'type': 'loss', 'content': 0.08402686566114426, 'timestamp': '2025-09-10 02:51:43.821135', 'step': 13396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:43.877227', 'step': 13396, 'epoch': 2} {'type': 'loss', 'content': 0.09328144788742065, 'timestamp': '2025-09-10 02:51:43.879546', 'step': 13397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:43.933797', 'step': 13397, 'epoch': 2} {'type': 'loss', 'content': 0.15198972821235657, 'timestamp': '2025-09-10 02:51:43.936187', 'step': 13398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:43.988972', 'step': 13398, 'epoch': 2} {'type': 'loss', 'content': 0.1275290846824646, 'timestamp': '2025-09-10 02:51:43.991190', 'step': 13399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:44.044962', 'step': 13399, 'epoch': 2} {'type': 'loss', 'content': 0.2628754675388336, 'timestamp': '2025-09-10 02:51:44.051204', 'step': 13400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:44.105595', 'step': 13400, 'epoch': 2} {'type': 'loss', 'content': 0.1485806405544281, 'timestamp': '2025-09-10 02:51:44.107952', 'step': 13401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:44.160949', 'step': 13401, 'epoch': 2} {'type': 'loss', 'content': 0.11406707763671875, 'timestamp': '2025-09-10 02:51:44.163363', 'step': 13402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:44.216979', 'step': 13402, 'epoch': 2} {'type': 'loss', 'content': 0.04028506204485893, 'timestamp': '2025-09-10 02:51:44.219349', 'step': 13403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:44.272678', 'step': 13403, 'epoch': 2} {'type': 'loss', 'content': 0.11761954426765442, 'timestamp': '2025-09-10 02:51:44.278804', 'step': 13404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:44.331235', 'step': 13404, 'epoch': 2} {'type': 'loss', 'content': 0.20339936017990112, 'timestamp': '2025-09-10 02:51:44.333528', 'step': 13405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:44.386464', 'step': 13405, 'epoch': 2} {'type': 'loss', 'content': 0.08463840186595917, 'timestamp': '2025-09-10 02:51:44.388699', 'step': 13406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:44.442964', 'step': 13406, 'epoch': 2} {'type': 'loss', 'content': 0.09709528833627701, 'timestamp': '2025-09-10 02:51:44.445270', 'step': 13407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:44.499556', 'step': 13407, 'epoch': 2} {'type': 'loss', 'content': 0.17488208413124084, 'timestamp': '2025-09-10 02:51:44.505833', 'step': 13408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:44.560153', 'step': 13408, 'epoch': 2} {'type': 'loss', 'content': 0.15709370374679565, 'timestamp': '2025-09-10 02:51:44.562640', 'step': 13409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:44.616296', 'step': 13409, 'epoch': 2} {'type': 'loss', 'content': 0.15680044889450073, 'timestamp': '2025-09-10 02:51:44.618678', 'step': 13410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:44.672164', 'step': 13410, 'epoch': 2} {'type': 'loss', 'content': 0.11172159016132355, 'timestamp': '2025-09-10 02:51:44.674452', 'step': 13411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:44.727079', 'step': 13411, 'epoch': 2} {'type': 'loss', 'content': 0.13161224126815796, 'timestamp': '2025-09-10 02:51:44.733401', 'step': 13412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:44.786283', 'step': 13412, 'epoch': 2} {'type': 'loss', 'content': 0.11846745014190674, 'timestamp': '2025-09-10 02:51:44.788483', 'step': 13413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:44.841874', 'step': 13413, 'epoch': 2} {'type': 'loss', 'content': 0.11275644600391388, 'timestamp': '2025-09-10 02:51:44.844205', 'step': 13414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:44.897543', 'step': 13414, 'epoch': 2} {'type': 'loss', 'content': 0.1048828512430191, 'timestamp': '2025-09-10 02:51:44.899908', 'step': 13415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:44.953782', 'step': 13415, 'epoch': 2} {'type': 'loss', 'content': 0.06515312194824219, 'timestamp': '2025-09-10 02:51:44.960042', 'step': 13416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:45.013750', 'step': 13416, 'epoch': 2} {'type': 'loss', 'content': 0.10913382470607758, 'timestamp': '2025-09-10 02:51:45.015925', 'step': 13417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:45.069070', 'step': 13417, 'epoch': 2} {'type': 'loss', 'content': 0.13202457129955292, 'timestamp': '2025-09-10 02:51:45.071423', 'step': 13418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:45.125957', 'step': 13418, 'epoch': 2} {'type': 'loss', 'content': 0.14088062942028046, 'timestamp': '2025-09-10 02:51:45.128403', 'step': 13419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:45.181996', 'step': 13419, 'epoch': 2} {'type': 'loss', 'content': 0.04628780856728554, 'timestamp': '2025-09-10 02:51:45.188355', 'step': 13420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:45.240975', 'step': 13420, 'epoch': 2} {'type': 'loss', 'content': 0.18482719361782074, 'timestamp': '2025-09-10 02:51:45.243309', 'step': 13421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:45.300207', 'step': 13421, 'epoch': 2} {'type': 'loss', 'content': 0.14406462013721466, 'timestamp': '2025-09-10 02:51:45.302448', 'step': 13422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:45.354791', 'step': 13422, 'epoch': 2} {'type': 'loss', 'content': 0.06937549263238907, 'timestamp': '2025-09-10 02:51:45.357128', 'step': 13423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:45.409960', 'step': 13423, 'epoch': 2} {'type': 'loss', 'content': 0.04889969155192375, 'timestamp': '2025-09-10 02:51:45.416290', 'step': 13424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:45.469764', 'step': 13424, 'epoch': 2} {'type': 'loss', 'content': 0.12082027643918991, 'timestamp': '2025-09-10 02:51:45.472086', 'step': 13425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:45.526812', 'step': 13425, 'epoch': 2} {'type': 'loss', 'content': 0.08040200918912888, 'timestamp': '2025-09-10 02:51:45.529153', 'step': 13426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:45.582398', 'step': 13426, 'epoch': 2} {'type': 'loss', 'content': 0.07265841215848923, 'timestamp': '2025-09-10 02:51:45.584742', 'step': 13427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:45.637844', 'step': 13427, 'epoch': 2} {'type': 'loss', 'content': 0.12808656692504883, 'timestamp': '2025-09-10 02:51:45.644026', 'step': 13428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:45.697418', 'step': 13428, 'epoch': 2} {'type': 'loss', 'content': 0.08831214159727097, 'timestamp': '2025-09-10 02:51:45.699522', 'step': 13429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:45.752740', 'step': 13429, 'epoch': 2} {'type': 'loss', 'content': 0.13555191457271576, 'timestamp': '2025-09-10 02:51:45.754805', 'step': 13430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:45.809699', 'step': 13430, 'epoch': 2} {'type': 'loss', 'content': 0.08052442967891693, 'timestamp': '2025-09-10 02:51:45.811786', 'step': 13431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:45.866526', 'step': 13431, 'epoch': 2} {'type': 'loss', 'content': 0.0981961116194725, 'timestamp': '2025-09-10 02:51:45.872614', 'step': 13432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:45.926659', 'step': 13432, 'epoch': 2} {'type': 'loss', 'content': 0.1276020109653473, 'timestamp': '2025-09-10 02:51:45.928883', 'step': 13433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:45.984034', 'step': 13433, 'epoch': 2} {'type': 'loss', 'content': 0.1029815524816513, 'timestamp': '2025-09-10 02:51:45.986569', 'step': 13434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:46.042664', 'step': 13434, 'epoch': 2} {'type': 'loss', 'content': 0.16197647154331207, 'timestamp': '2025-09-10 02:51:46.045139', 'step': 13435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:46.099136', 'step': 13435, 'epoch': 2} {'type': 'loss', 'content': 0.1206255778670311, 'timestamp': '2025-09-10 02:51:46.105360', 'step': 13436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:46.158006', 'step': 13436, 'epoch': 2} {'type': 'loss', 'content': 0.1206468790769577, 'timestamp': '2025-09-10 02:51:46.160284', 'step': 13437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:46.213745', 'step': 13437, 'epoch': 2} {'type': 'loss', 'content': 0.10655120760202408, 'timestamp': '2025-09-10 02:51:46.216190', 'step': 13438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:51:46.270209', 'step': 13438, 'epoch': 2} {'type': 'loss', 'content': 0.1936013400554657, 'timestamp': '2025-09-10 02:51:46.272646', 'step': 13439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:46.326295', 'step': 13439, 'epoch': 2} {'type': 'loss', 'content': 0.12044684588909149, 'timestamp': '2025-09-10 02:51:46.332348', 'step': 13440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:46.384993', 'step': 13440, 'epoch': 2} {'type': 'loss', 'content': 0.10380356013774872, 'timestamp': '2025-09-10 02:51:46.387227', 'step': 13441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:46.440848', 'step': 13441, 'epoch': 2} {'type': 'loss', 'content': 0.13031746447086334, 'timestamp': '2025-09-10 02:51:46.443106', 'step': 13442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:46.496572', 'step': 13442, 'epoch': 2} {'type': 'loss', 'content': 0.13686475157737732, 'timestamp': '2025-09-10 02:51:46.498841', 'step': 13443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:46.552190', 'step': 13443, 'epoch': 2} {'type': 'loss', 'content': 0.1798008382320404, 'timestamp': '2025-09-10 02:51:46.558632', 'step': 13444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:46.611762', 'step': 13444, 'epoch': 2} {'type': 'loss', 'content': 0.11368568986654282, 'timestamp': '2025-09-10 02:51:46.614088', 'step': 13445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:46.667443', 'step': 13445, 'epoch': 2} {'type': 'loss', 'content': 0.09684773534536362, 'timestamp': '2025-09-10 02:51:46.669781', 'step': 13446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:46.722439', 'step': 13446, 'epoch': 2} {'type': 'loss', 'content': 0.08795367926359177, 'timestamp': '2025-09-10 02:51:46.724716', 'step': 13447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:46.777614', 'step': 13447, 'epoch': 2} {'type': 'loss', 'content': 0.11465304344892502, 'timestamp': '2025-09-10 02:51:46.783596', 'step': 13448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:46.835955', 'step': 13448, 'epoch': 2} {'type': 'loss', 'content': 0.0775342732667923, 'timestamp': '2025-09-10 02:51:46.838197', 'step': 13449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:46.892620', 'step': 13449, 'epoch': 2} {'type': 'loss', 'content': 0.1348353773355484, 'timestamp': '2025-09-10 02:51:46.894899', 'step': 13450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:46.948138', 'step': 13450, 'epoch': 2} {'type': 'loss', 'content': 0.09265509992837906, 'timestamp': '2025-09-10 02:51:46.950528', 'step': 13451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:47.004232', 'step': 13451, 'epoch': 2} {'type': 'loss', 'content': 0.06371455639600754, 'timestamp': '2025-09-10 02:51:47.010300', 'step': 13452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:47.063352', 'step': 13452, 'epoch': 2} {'type': 'loss', 'content': 0.13156579434871674, 'timestamp': '2025-09-10 02:51:47.065773', 'step': 13453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:47.119228', 'step': 13453, 'epoch': 2} {'type': 'loss', 'content': 0.13816066086292267, 'timestamp': '2025-09-10 02:51:47.121566', 'step': 13454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:47.174770', 'step': 13454, 'epoch': 2} {'type': 'loss', 'content': 0.19686001539230347, 'timestamp': '2025-09-10 02:51:47.177135', 'step': 13455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:47.230181', 'step': 13455, 'epoch': 2} {'type': 'loss', 'content': 0.09937044978141785, 'timestamp': '2025-09-10 02:51:47.236292', 'step': 13456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:47.290002', 'step': 13456, 'epoch': 2} {'type': 'loss', 'content': 0.13802644610404968, 'timestamp': '2025-09-10 02:51:47.292280', 'step': 13457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:47.345662', 'step': 13457, 'epoch': 2} {'type': 'loss', 'content': 0.12230628728866577, 'timestamp': '2025-09-10 02:51:47.347986', 'step': 13458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:47.401097', 'step': 13458, 'epoch': 2} {'type': 'loss', 'content': 0.14363209903240204, 'timestamp': '2025-09-10 02:51:47.403477', 'step': 13459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:47.456878', 'step': 13459, 'epoch': 2} {'type': 'loss', 'content': 0.18867875635623932, 'timestamp': '2025-09-10 02:51:47.462839', 'step': 13460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:47.515498', 'step': 13460, 'epoch': 2} {'type': 'loss', 'content': 0.11018378287553787, 'timestamp': '2025-09-10 02:51:47.517953', 'step': 13461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:47.572158', 'step': 13461, 'epoch': 2} {'type': 'loss', 'content': 0.12540602684020996, 'timestamp': '2025-09-10 02:51:47.574512', 'step': 13462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:47.628478', 'step': 13462, 'epoch': 2} {'type': 'loss', 'content': 0.12699803709983826, 'timestamp': '2025-09-10 02:51:47.630598', 'step': 13463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:47.683646', 'step': 13463, 'epoch': 2} {'type': 'loss', 'content': 0.10987378656864166, 'timestamp': '2025-09-10 02:51:47.689579', 'step': 13464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:47.742472', 'step': 13464, 'epoch': 2} {'type': 'loss', 'content': 0.11498074978590012, 'timestamp': '2025-09-10 02:51:47.744710', 'step': 13465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:47.797475', 'step': 13465, 'epoch': 2} {'type': 'loss', 'content': 0.08090110868215561, 'timestamp': '2025-09-10 02:51:47.799875', 'step': 13466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:47.853258', 'step': 13466, 'epoch': 2} {'type': 'loss', 'content': 0.21436436474323273, 'timestamp': '2025-09-10 02:51:47.855581', 'step': 13467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:47.908866', 'step': 13467, 'epoch': 2} {'type': 'loss', 'content': 0.05281604826450348, 'timestamp': '2025-09-10 02:51:47.914761', 'step': 13468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:47.967996', 'step': 13468, 'epoch': 2} {'type': 'loss', 'content': 0.10132813453674316, 'timestamp': '2025-09-10 02:51:47.970358', 'step': 13469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:48.023759', 'step': 13469, 'epoch': 2} {'type': 'loss', 'content': 0.138201504945755, 'timestamp': '2025-09-10 02:51:48.026131', 'step': 13470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:48.079118', 'step': 13470, 'epoch': 2} {'type': 'loss', 'content': 0.17051802575588226, 'timestamp': '2025-09-10 02:51:48.081473', 'step': 13471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:48.134286', 'step': 13471, 'epoch': 2} {'type': 'loss', 'content': 0.060303304344415665, 'timestamp': '2025-09-10 02:51:48.140428', 'step': 13472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:48.193382', 'step': 13472, 'epoch': 2} {'type': 'loss', 'content': 0.17668619751930237, 'timestamp': '2025-09-10 02:51:48.195578', 'step': 13473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:48.248592', 'step': 13473, 'epoch': 2} {'type': 'loss', 'content': 0.15380336344242096, 'timestamp': '2025-09-10 02:51:48.250863', 'step': 13474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:48.305297', 'step': 13474, 'epoch': 2} {'type': 'loss', 'content': 0.08113663643598557, 'timestamp': '2025-09-10 02:51:48.308292', 'step': 13475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:48.361036', 'step': 13475, 'epoch': 2} {'type': 'loss', 'content': 0.18276257812976837, 'timestamp': '2025-09-10 02:51:48.367109', 'step': 13476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:48.420370', 'step': 13476, 'epoch': 2} {'type': 'loss', 'content': 0.12255042791366577, 'timestamp': '2025-09-10 02:51:48.422778', 'step': 13477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:48.478026', 'step': 13477, 'epoch': 2} {'type': 'loss', 'content': 0.06219921261072159, 'timestamp': '2025-09-10 02:51:48.480232', 'step': 13478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:48.534242', 'step': 13478, 'epoch': 2} {'type': 'loss', 'content': 0.04261787608265877, 'timestamp': '2025-09-10 02:51:48.536523', 'step': 13479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:48.590445', 'step': 13479, 'epoch': 2} {'type': 'loss', 'content': 0.1885993480682373, 'timestamp': '2025-09-10 02:51:48.596646', 'step': 13480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:48.649432', 'step': 13480, 'epoch': 2} {'type': 'loss', 'content': 0.14404433965682983, 'timestamp': '2025-09-10 02:51:48.651806', 'step': 13481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:48.705323', 'step': 13481, 'epoch': 2} {'type': 'loss', 'content': 0.08509465306997299, 'timestamp': '2025-09-10 02:51:48.707836', 'step': 13482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:48.760980', 'step': 13482, 'epoch': 2} {'type': 'loss', 'content': 0.07038000971078873, 'timestamp': '2025-09-10 02:51:48.763238', 'step': 13483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:48.818555', 'step': 13483, 'epoch': 2} {'type': 'loss', 'content': 0.13050754368305206, 'timestamp': '2025-09-10 02:51:48.824799', 'step': 13484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:48.877623', 'step': 13484, 'epoch': 2} {'type': 'loss', 'content': 0.10571525990962982, 'timestamp': '2025-09-10 02:51:48.879854', 'step': 13485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:48.933085', 'step': 13485, 'epoch': 2} {'type': 'loss', 'content': 0.11706918478012085, 'timestamp': '2025-09-10 02:51:48.935440', 'step': 13486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:48.988584', 'step': 13486, 'epoch': 2} {'type': 'loss', 'content': 0.057758182287216187, 'timestamp': '2025-09-10 02:51:48.990894', 'step': 13487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:49.044510', 'step': 13487, 'epoch': 2} {'type': 'loss', 'content': 0.12608492374420166, 'timestamp': '2025-09-10 02:51:49.050474', 'step': 13488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:49.103365', 'step': 13488, 'epoch': 2} {'type': 'loss', 'content': 0.15117256343364716, 'timestamp': '2025-09-10 02:51:49.105769', 'step': 13489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:49.160295', 'step': 13489, 'epoch': 2} {'type': 'loss', 'content': 0.09177916496992111, 'timestamp': '2025-09-10 02:51:49.162669', 'step': 13490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:49.217273', 'step': 13490, 'epoch': 2} {'type': 'loss', 'content': 0.06829582154750824, 'timestamp': '2025-09-10 02:51:49.219617', 'step': 13491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:49.272721', 'step': 13491, 'epoch': 2} {'type': 'loss', 'content': 0.1280325949192047, 'timestamp': '2025-09-10 02:51:49.278921', 'step': 13492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:49.331943', 'step': 13492, 'epoch': 2} {'type': 'loss', 'content': 0.124679334461689, 'timestamp': '2025-09-10 02:51:49.334213', 'step': 13493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:49.387341', 'step': 13493, 'epoch': 2} {'type': 'loss', 'content': 0.11977681517601013, 'timestamp': '2025-09-10 02:51:49.389698', 'step': 13494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:49.443146', 'step': 13494, 'epoch': 2} {'type': 'loss', 'content': 0.1630295366048813, 'timestamp': '2025-09-10 02:51:49.445687', 'step': 13495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:49.499768', 'step': 13495, 'epoch': 2} {'type': 'loss', 'content': 0.2101619690656662, 'timestamp': '2025-09-10 02:51:49.506199', 'step': 13496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:49.560050', 'step': 13496, 'epoch': 2} {'type': 'loss', 'content': 0.09329383075237274, 'timestamp': '2025-09-10 02:51:49.562424', 'step': 13497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:49.615693', 'step': 13497, 'epoch': 2} {'type': 'loss', 'content': 0.14045852422714233, 'timestamp': '2025-09-10 02:51:49.617980', 'step': 13498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:49.671331', 'step': 13498, 'epoch': 2} {'type': 'loss', 'content': 0.13782763481140137, 'timestamp': '2025-09-10 02:51:49.673627', 'step': 13499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:49.727952', 'step': 13499, 'epoch': 2} {'type': 'loss', 'content': 0.09633978456258774, 'timestamp': '2025-09-10 02:51:49.734200', 'step': 13500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13500', 'timestamp': '2025-09-10 02:51:50.104550', 'step': 13500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:50.162842', 'step': 13500, 'epoch': 2} {'type': 'loss', 'content': 0.131021186709404, 'timestamp': '2025-09-10 02:51:50.165203', 'step': 13501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:50.227001', 'step': 13501, 'epoch': 2} {'type': 'loss', 'content': 0.10148302465677261, 'timestamp': '2025-09-10 02:51:50.229339', 'step': 13502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:50.285809', 'step': 13502, 'epoch': 2} {'type': 'loss', 'content': 0.12857364118099213, 'timestamp': '2025-09-10 02:51:50.288203', 'step': 13503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:50.345705', 'step': 13503, 'epoch': 2} {'type': 'loss', 'content': 0.08428756892681122, 'timestamp': '2025-09-10 02:51:50.352338', 'step': 13504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:50.407928', 'step': 13504, 'epoch': 2} {'type': 'loss', 'content': 0.11294129490852356, 'timestamp': '2025-09-10 02:51:50.410298', 'step': 13505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:50.464431', 'step': 13505, 'epoch': 2} {'type': 'loss', 'content': 0.10749958455562592, 'timestamp': '2025-09-10 02:51:50.466734', 'step': 13506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:50.524353', 'step': 13506, 'epoch': 2} {'type': 'loss', 'content': 0.05218842625617981, 'timestamp': '2025-09-10 02:51:50.527948', 'step': 13507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:50.582102', 'step': 13507, 'epoch': 2} {'type': 'loss', 'content': 0.10596788674592972, 'timestamp': '2025-09-10 02:51:50.588603', 'step': 13508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:50.641469', 'step': 13508, 'epoch': 2} {'type': 'loss', 'content': 0.07236216962337494, 'timestamp': '2025-09-10 02:51:50.643850', 'step': 13509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:51:50.697138', 'step': 13509, 'epoch': 2} {'type': 'loss', 'content': 0.11527448147535324, 'timestamp': '2025-09-10 02:51:50.699637', 'step': 13510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:50.752758', 'step': 13510, 'epoch': 2} {'type': 'loss', 'content': 0.20378512144088745, 'timestamp': '2025-09-10 02:51:50.755089', 'step': 13511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:51:50.808493', 'step': 13511, 'epoch': 2} {'type': 'loss', 'content': 0.07304877042770386, 'timestamp': '2025-09-10 02:51:50.814524', 'step': 13512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:50.867187', 'step': 13512, 'epoch': 2} {'type': 'loss', 'content': 0.06963085383176804, 'timestamp': '2025-09-10 02:51:50.869461', 'step': 13513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:51:50.923462', 'step': 13513, 'epoch': 2} {'type': 'loss', 'content': 0.0763116180896759, 'timestamp': '2025-09-10 02:51:50.925728', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:52:03.931936', 'step': 13514, 'epoch': 2} {'type': 'pplx', 'content': 13793.210352405898, 'timestamp': '2025-09-10 02:52:03.934885', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:03.996675', 'step': 13514, 'epoch': 2} {'type': 'loss', 'content': 0.1338411420583725, 'timestamp': '2025-09-10 02:52:03.998728', 'step': 13515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:04.055255', 'step': 13515, 'epoch': 2} {'type': 'loss', 'content': 0.13434644043445587, 'timestamp': '2025-09-10 02:52:04.061324', 'step': 13516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:04.115206', 'step': 13516, 'epoch': 2} {'type': 'loss', 'content': 0.08735765516757965, 'timestamp': '2025-09-10 02:52:04.118422', 'step': 13517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:04.176587', 'step': 13517, 'epoch': 2} {'type': 'loss', 'content': 0.12107767909765244, 'timestamp': '2025-09-10 02:52:04.178804', 'step': 13518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:04.234564', 'step': 13518, 'epoch': 2} {'type': 'loss', 'content': 0.2554320991039276, 'timestamp': '2025-09-10 02:52:04.236688', 'step': 13519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:04.293323', 'step': 13519, 'epoch': 2} {'type': 'loss', 'content': 0.12213601171970367, 'timestamp': '2025-09-10 02:52:04.299422', 'step': 13520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:04.358187', 'step': 13520, 'epoch': 2} {'type': 'loss', 'content': 0.12202037125825882, 'timestamp': '2025-09-10 02:52:04.360200', 'step': 13521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:04.418442', 'step': 13521, 'epoch': 2} {'type': 'loss', 'content': 0.1503712236881256, 'timestamp': '2025-09-10 02:52:04.420391', 'step': 13522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:04.476807', 'step': 13522, 'epoch': 2} {'type': 'loss', 'content': 0.053082000464200974, 'timestamp': '2025-09-10 02:52:04.478852', 'step': 13523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:04.534029', 'step': 13523, 'epoch': 2} {'type': 'loss', 'content': 0.06781181693077087, 'timestamp': '2025-09-10 02:52:04.540171', 'step': 13524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:04.595789', 'step': 13524, 'epoch': 2} {'type': 'loss', 'content': 0.16714084148406982, 'timestamp': '2025-09-10 02:52:04.597769', 'step': 13525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:04.652830', 'step': 13525, 'epoch': 2} {'type': 'loss', 'content': 0.04482026398181915, 'timestamp': '2025-09-10 02:52:04.654819', 'step': 13526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:04.711975', 'step': 13526, 'epoch': 2} {'type': 'loss', 'content': 0.1391826570034027, 'timestamp': '2025-09-10 02:52:04.713908', 'step': 13527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:04.768649', 'step': 13527, 'epoch': 2} {'type': 'loss', 'content': 0.09124578535556793, 'timestamp': '2025-09-10 02:52:04.774610', 'step': 13528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:04.831314', 'step': 13528, 'epoch': 2} {'type': 'loss', 'content': 0.1159331351518631, 'timestamp': '2025-09-10 02:52:04.833319', 'step': 13529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:04.886953', 'step': 13529, 'epoch': 2} {'type': 'loss', 'content': 0.10068972408771515, 'timestamp': '2025-09-10 02:52:04.889000', 'step': 13530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:04.942593', 'step': 13530, 'epoch': 2} {'type': 'loss', 'content': 0.14121316373348236, 'timestamp': '2025-09-10 02:52:04.944562', 'step': 13531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:05.000388', 'step': 13531, 'epoch': 2} {'type': 'loss', 'content': 0.08152469992637634, 'timestamp': '2025-09-10 02:52:05.006753', 'step': 13532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:05.062320', 'step': 13532, 'epoch': 2} {'type': 'loss', 'content': 0.10216455161571503, 'timestamp': '2025-09-10 02:52:05.064425', 'step': 13533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:05.119572', 'step': 13533, 'epoch': 2} {'type': 'loss', 'content': 0.09984730184078217, 'timestamp': '2025-09-10 02:52:05.121509', 'step': 13534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:05.177125', 'step': 13534, 'epoch': 2} {'type': 'loss', 'content': 0.10547848045825958, 'timestamp': '2025-09-10 02:52:05.179102', 'step': 13535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:05.236748', 'step': 13535, 'epoch': 2} {'type': 'loss', 'content': 0.14014872908592224, 'timestamp': '2025-09-10 02:52:05.242899', 'step': 13536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:05.297930', 'step': 13536, 'epoch': 2} {'type': 'loss', 'content': 0.046707168221473694, 'timestamp': '2025-09-10 02:52:05.299912', 'step': 13537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:05.355518', 'step': 13537, 'epoch': 2} {'type': 'loss', 'content': 0.07901077717542648, 'timestamp': '2025-09-10 02:52:05.357476', 'step': 13538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:05.412460', 'step': 13538, 'epoch': 2} {'type': 'loss', 'content': 0.12178768217563629, 'timestamp': '2025-09-10 02:52:05.414483', 'step': 13539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:05.469899', 'step': 13539, 'epoch': 2} {'type': 'loss', 'content': 0.1368386596441269, 'timestamp': '2025-09-10 02:52:05.476061', 'step': 13540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:05.531244', 'step': 13540, 'epoch': 2} {'type': 'loss', 'content': 0.20098580420017242, 'timestamp': '2025-09-10 02:52:05.533378', 'step': 13541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:05.590072', 'step': 13541, 'epoch': 2} {'type': 'loss', 'content': 0.06538931280374527, 'timestamp': '2025-09-10 02:52:05.592164', 'step': 13542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:05.648421', 'step': 13542, 'epoch': 2} {'type': 'loss', 'content': 0.12023235857486725, 'timestamp': '2025-09-10 02:52:05.650474', 'step': 13543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:05.707206', 'step': 13543, 'epoch': 2} {'type': 'loss', 'content': 0.08866611868143082, 'timestamp': '2025-09-10 02:52:05.713317', 'step': 13544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:05.775643', 'step': 13544, 'epoch': 2} {'type': 'loss', 'content': 0.1688425987958908, 'timestamp': '2025-09-10 02:52:05.777723', 'step': 13545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:05.832077', 'step': 13545, 'epoch': 2} {'type': 'loss', 'content': 0.17009237408638, 'timestamp': '2025-09-10 02:52:05.834229', 'step': 13546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:05.890577', 'step': 13546, 'epoch': 2} {'type': 'loss', 'content': 0.22402675449848175, 'timestamp': '2025-09-10 02:52:05.892799', 'step': 13547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:05.950408', 'step': 13547, 'epoch': 2} {'type': 'loss', 'content': 0.16636578738689423, 'timestamp': '2025-09-10 02:52:05.956447', 'step': 13548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:06.011982', 'step': 13548, 'epoch': 2} {'type': 'loss', 'content': 0.05959725007414818, 'timestamp': '2025-09-10 02:52:06.014115', 'step': 13549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:06.069858', 'step': 13549, 'epoch': 2} {'type': 'loss', 'content': 0.12870049476623535, 'timestamp': '2025-09-10 02:52:06.072379', 'step': 13550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:06.128334', 'step': 13550, 'epoch': 2} {'type': 'loss', 'content': 0.12499841302633286, 'timestamp': '2025-09-10 02:52:06.130269', 'step': 13551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:06.186756', 'step': 13551, 'epoch': 2} {'type': 'loss', 'content': 0.13292543590068817, 'timestamp': '2025-09-10 02:52:06.192896', 'step': 13552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:06.248454', 'step': 13552, 'epoch': 2} {'type': 'loss', 'content': 0.11331775784492493, 'timestamp': '2025-09-10 02:52:06.250430', 'step': 13553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:06.304776', 'step': 13553, 'epoch': 2} {'type': 'loss', 'content': 0.12694193422794342, 'timestamp': '2025-09-10 02:52:06.306795', 'step': 13554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:06.373479', 'step': 13554, 'epoch': 2} {'type': 'loss', 'content': 0.1702229231595993, 'timestamp': '2025-09-10 02:52:06.375506', 'step': 13555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:06.430539', 'step': 13555, 'epoch': 2} {'type': 'loss', 'content': 0.20420776307582855, 'timestamp': '2025-09-10 02:52:06.436586', 'step': 13556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:06.490968', 'step': 13556, 'epoch': 2} {'type': 'loss', 'content': 0.1146782711148262, 'timestamp': '2025-09-10 02:52:06.493526', 'step': 13557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:06.549251', 'step': 13557, 'epoch': 2} {'type': 'loss', 'content': 0.0932050347328186, 'timestamp': '2025-09-10 02:52:06.551254', 'step': 13558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:06.607029', 'step': 13558, 'epoch': 2} {'type': 'loss', 'content': 0.09956340491771698, 'timestamp': '2025-09-10 02:52:06.609036', 'step': 13559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:06.665746', 'step': 13559, 'epoch': 2} {'type': 'loss', 'content': 0.084995336830616, 'timestamp': '2025-09-10 02:52:06.671893', 'step': 13560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:06.726982', 'step': 13560, 'epoch': 2} {'type': 'loss', 'content': 0.11362822353839874, 'timestamp': '2025-09-10 02:52:06.729067', 'step': 13561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:06.784252', 'step': 13561, 'epoch': 2} {'type': 'loss', 'content': 0.060516197234392166, 'timestamp': '2025-09-10 02:52:06.786444', 'step': 13562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:06.845768', 'step': 13562, 'epoch': 2} {'type': 'loss', 'content': 0.12450974434614182, 'timestamp': '2025-09-10 02:52:06.847791', 'step': 13563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:06.909886', 'step': 13563, 'epoch': 2} {'type': 'loss', 'content': 0.12646827101707458, 'timestamp': '2025-09-10 02:52:06.915986', 'step': 13564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:06.970264', 'step': 13564, 'epoch': 2} {'type': 'loss', 'content': 0.12715482711791992, 'timestamp': '2025-09-10 02:52:06.973463', 'step': 13565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:07.030593', 'step': 13565, 'epoch': 2} {'type': 'loss', 'content': 0.10230117291212082, 'timestamp': '2025-09-10 02:52:07.032558', 'step': 13566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:07.089717', 'step': 13566, 'epoch': 2} {'type': 'loss', 'content': 0.19206713140010834, 'timestamp': '2025-09-10 02:52:07.091747', 'step': 13567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:07.149250', 'step': 13567, 'epoch': 2} {'type': 'loss', 'content': 0.1350853592157364, 'timestamp': '2025-09-10 02:52:07.159478', 'step': 13568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:07.218455', 'step': 13568, 'epoch': 2} {'type': 'loss', 'content': 0.1405465304851532, 'timestamp': '2025-09-10 02:52:07.220396', 'step': 13569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:07.276346', 'step': 13569, 'epoch': 2} {'type': 'loss', 'content': 0.0977153480052948, 'timestamp': '2025-09-10 02:52:07.278447', 'step': 13570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:07.333435', 'step': 13570, 'epoch': 2} {'type': 'loss', 'content': 0.12433352321386337, 'timestamp': '2025-09-10 02:52:07.335396', 'step': 13571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:07.393083', 'step': 13571, 'epoch': 2} {'type': 'loss', 'content': 0.14429353177547455, 'timestamp': '2025-09-10 02:52:07.399650', 'step': 13572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:07.464867', 'step': 13572, 'epoch': 2} {'type': 'loss', 'content': 0.126216858625412, 'timestamp': '2025-09-10 02:52:07.466851', 'step': 13573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:07.522452', 'step': 13573, 'epoch': 2} {'type': 'loss', 'content': 0.12355078011751175, 'timestamp': '2025-09-10 02:52:07.527138', 'step': 13574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:07.590830', 'step': 13574, 'epoch': 2} {'type': 'loss', 'content': 0.07272747904062271, 'timestamp': '2025-09-10 02:52:07.592933', 'step': 13575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:07.650006', 'step': 13575, 'epoch': 2} {'type': 'loss', 'content': 0.12717124819755554, 'timestamp': '2025-09-10 02:52:07.660202', 'step': 13576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:07.719727', 'step': 13576, 'epoch': 2} {'type': 'loss', 'content': 0.13369734585285187, 'timestamp': '2025-09-10 02:52:07.721774', 'step': 13577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:07.781350', 'step': 13577, 'epoch': 2} {'type': 'loss', 'content': 0.09028662741184235, 'timestamp': '2025-09-10 02:52:07.783531', 'step': 13578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:07.839858', 'step': 13578, 'epoch': 2} {'type': 'loss', 'content': 0.08873675763607025, 'timestamp': '2025-09-10 02:52:07.841837', 'step': 13579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:07.898887', 'step': 13579, 'epoch': 2} {'type': 'loss', 'content': 0.16113786399364471, 'timestamp': '2025-09-10 02:52:07.905155', 'step': 13580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:07.963267', 'step': 13580, 'epoch': 2} {'type': 'loss', 'content': 0.08987194299697876, 'timestamp': '2025-09-10 02:52:07.965269', 'step': 13581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:08.020193', 'step': 13581, 'epoch': 2} {'type': 'loss', 'content': 0.15220557153224945, 'timestamp': '2025-09-10 02:52:08.022197', 'step': 13582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:08.079602', 'step': 13582, 'epoch': 2} {'type': 'loss', 'content': 0.1561097949743271, 'timestamp': '2025-09-10 02:52:08.081647', 'step': 13583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:08.135767', 'step': 13583, 'epoch': 2} {'type': 'loss', 'content': 0.07986725121736526, 'timestamp': '2025-09-10 02:52:08.142098', 'step': 13584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:08.196494', 'step': 13584, 'epoch': 2} {'type': 'loss', 'content': 0.18259257078170776, 'timestamp': '2025-09-10 02:52:08.198459', 'step': 13585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:08.255016', 'step': 13585, 'epoch': 2} {'type': 'loss', 'content': 0.15046286582946777, 'timestamp': '2025-09-10 02:52:08.256989', 'step': 13586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:08.312014', 'step': 13586, 'epoch': 2} {'type': 'loss', 'content': 0.07980138808488846, 'timestamp': '2025-09-10 02:52:08.314032', 'step': 13587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:08.383652', 'step': 13587, 'epoch': 2} {'type': 'loss', 'content': 0.13503775000572205, 'timestamp': '2025-09-10 02:52:08.389776', 'step': 13588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:08.444508', 'step': 13588, 'epoch': 2} {'type': 'loss', 'content': 0.08807618170976639, 'timestamp': '2025-09-10 02:52:08.446559', 'step': 13589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:08.504824', 'step': 13589, 'epoch': 2} {'type': 'loss', 'content': 0.12677592039108276, 'timestamp': '2025-09-10 02:52:08.507102', 'step': 13590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:08.563002', 'step': 13590, 'epoch': 2} {'type': 'loss', 'content': 0.0893714502453804, 'timestamp': '2025-09-10 02:52:08.565036', 'step': 13591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:08.622226', 'step': 13591, 'epoch': 2} {'type': 'loss', 'content': 0.23805947601795197, 'timestamp': '2025-09-10 02:52:08.628312', 'step': 13592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:08.683071', 'step': 13592, 'epoch': 2} {'type': 'loss', 'content': 0.1098380833864212, 'timestamp': '2025-09-10 02:52:08.685086', 'step': 13593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:08.742211', 'step': 13593, 'epoch': 2} {'type': 'loss', 'content': 0.07846556603908539, 'timestamp': '2025-09-10 02:52:08.744328', 'step': 13594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:08.800495', 'step': 13594, 'epoch': 2} {'type': 'loss', 'content': 0.16652809083461761, 'timestamp': '2025-09-10 02:52:08.802511', 'step': 13595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:08.857755', 'step': 13595, 'epoch': 2} {'type': 'loss', 'content': 0.10783068835735321, 'timestamp': '2025-09-10 02:52:08.863878', 'step': 13596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:08.919721', 'step': 13596, 'epoch': 2} {'type': 'loss', 'content': 0.12268424779176712, 'timestamp': '2025-09-10 02:52:08.921661', 'step': 13597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:08.977913', 'step': 13597, 'epoch': 2} {'type': 'loss', 'content': 0.21106697618961334, 'timestamp': '2025-09-10 02:52:08.979837', 'step': 13598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:09.038036', 'step': 13598, 'epoch': 2} {'type': 'loss', 'content': 0.1791009157896042, 'timestamp': '2025-09-10 02:52:09.040234', 'step': 13599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:09.096192', 'step': 13599, 'epoch': 2} {'type': 'loss', 'content': 0.09228239953517914, 'timestamp': '2025-09-10 02:52:09.102073', 'step': 13600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:09.156035', 'step': 13600, 'epoch': 2} {'type': 'loss', 'content': 0.09107844531536102, 'timestamp': '2025-09-10 02:52:09.158151', 'step': 13601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:09.212993', 'step': 13601, 'epoch': 2} {'type': 'loss', 'content': 0.1925254464149475, 'timestamp': '2025-09-10 02:52:09.215250', 'step': 13602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:09.272009', 'step': 13602, 'epoch': 2} {'type': 'loss', 'content': 0.14775581657886505, 'timestamp': '2025-09-10 02:52:09.274058', 'step': 13603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:09.328964', 'step': 13603, 'epoch': 2} {'type': 'loss', 'content': 0.0759752169251442, 'timestamp': '2025-09-10 02:52:09.335122', 'step': 13604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:09.388684', 'step': 13604, 'epoch': 2} {'type': 'loss', 'content': 0.17078103125095367, 'timestamp': '2025-09-10 02:52:09.390865', 'step': 13605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:09.446932', 'step': 13605, 'epoch': 2} {'type': 'loss', 'content': 0.09339134395122528, 'timestamp': '2025-09-10 02:52:09.448887', 'step': 13606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:09.503687', 'step': 13606, 'epoch': 2} {'type': 'loss', 'content': 0.13511286675930023, 'timestamp': '2025-09-10 02:52:09.505775', 'step': 13607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:09.560064', 'step': 13607, 'epoch': 2} {'type': 'loss', 'content': 0.15053777396678925, 'timestamp': '2025-09-10 02:52:09.566058', 'step': 13608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:09.621842', 'step': 13608, 'epoch': 2} {'type': 'loss', 'content': 0.12637996673583984, 'timestamp': '2025-09-10 02:52:09.623724', 'step': 13609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:09.677177', 'step': 13609, 'epoch': 2} {'type': 'loss', 'content': 0.14619165658950806, 'timestamp': '2025-09-10 02:52:09.679102', 'step': 13610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:09.735416', 'step': 13610, 'epoch': 2} {'type': 'loss', 'content': 0.14425407350063324, 'timestamp': '2025-09-10 02:52:09.737413', 'step': 13611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:09.795211', 'step': 13611, 'epoch': 2} {'type': 'loss', 'content': 0.09858264029026031, 'timestamp': '2025-09-10 02:52:09.801644', 'step': 13612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:09.856865', 'step': 13612, 'epoch': 2} {'type': 'loss', 'content': 0.19851075112819672, 'timestamp': '2025-09-10 02:52:09.858917', 'step': 13613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:09.914556', 'step': 13613, 'epoch': 2} {'type': 'loss', 'content': 0.08253157138824463, 'timestamp': '2025-09-10 02:52:09.916552', 'step': 13614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:09.972691', 'step': 13614, 'epoch': 2} {'type': 'loss', 'content': 0.13902944326400757, 'timestamp': '2025-09-10 02:52:09.974760', 'step': 13615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:10.030820', 'step': 13615, 'epoch': 2} {'type': 'loss', 'content': 0.19778254628181458, 'timestamp': '2025-09-10 02:52:10.036900', 'step': 13616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:10.090352', 'step': 13616, 'epoch': 2} {'type': 'loss', 'content': 0.13898944854736328, 'timestamp': '2025-09-10 02:52:10.092453', 'step': 13617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:10.146408', 'step': 13617, 'epoch': 2} {'type': 'loss', 'content': 0.24233737587928772, 'timestamp': '2025-09-10 02:52:10.148557', 'step': 13618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:10.203948', 'step': 13618, 'epoch': 2} {'type': 'loss', 'content': 0.147347092628479, 'timestamp': '2025-09-10 02:52:10.206160', 'step': 13619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:10.262339', 'step': 13619, 'epoch': 2} {'type': 'loss', 'content': 0.06785831600427628, 'timestamp': '2025-09-10 02:52:10.268449', 'step': 13620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:10.323727', 'step': 13620, 'epoch': 2} {'type': 'loss', 'content': 0.07023907452821732, 'timestamp': '2025-09-10 02:52:10.325795', 'step': 13621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:10.389271', 'step': 13621, 'epoch': 2} {'type': 'loss', 'content': 0.123411163687706, 'timestamp': '2025-09-10 02:52:10.391001', 'step': 13622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:10.445725', 'step': 13622, 'epoch': 2} {'type': 'loss', 'content': 0.0688362866640091, 'timestamp': '2025-09-10 02:52:10.447887', 'step': 13623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:10.503543', 'step': 13623, 'epoch': 2} {'type': 'loss', 'content': 0.12543989717960358, 'timestamp': '2025-09-10 02:52:10.509350', 'step': 13624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:10.562533', 'step': 13624, 'epoch': 2} {'type': 'loss', 'content': 0.09107748419046402, 'timestamp': '2025-09-10 02:52:10.564292', 'step': 13625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:10.617652', 'step': 13625, 'epoch': 2} {'type': 'loss', 'content': 0.07095633447170258, 'timestamp': '2025-09-10 02:52:10.619987', 'step': 13626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:10.674010', 'step': 13626, 'epoch': 2} {'type': 'loss', 'content': 0.12796853482723236, 'timestamp': '2025-09-10 02:52:10.676209', 'step': 13627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:10.730960', 'step': 13627, 'epoch': 2} {'type': 'loss', 'content': 0.11162964254617691, 'timestamp': '2025-09-10 02:52:10.737352', 'step': 13628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:10.792423', 'step': 13628, 'epoch': 2} {'type': 'loss', 'content': 0.12410958111286163, 'timestamp': '2025-09-10 02:52:10.794573', 'step': 13629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:10.849101', 'step': 13629, 'epoch': 2} {'type': 'loss', 'content': 0.08563701063394547, 'timestamp': '2025-09-10 02:52:10.851462', 'step': 13630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:10.909775', 'step': 13630, 'epoch': 2} {'type': 'loss', 'content': 0.13512323796749115, 'timestamp': '2025-09-10 02:52:10.911930', 'step': 13631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:10.968197', 'step': 13631, 'epoch': 2} {'type': 'loss', 'content': 0.09591531753540039, 'timestamp': '2025-09-10 02:52:10.974549', 'step': 13632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:11.027623', 'step': 13632, 'epoch': 2} {'type': 'loss', 'content': 0.06402555108070374, 'timestamp': '2025-09-10 02:52:11.029686', 'step': 13633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:11.083731', 'step': 13633, 'epoch': 2} {'type': 'loss', 'content': 0.18026778101921082, 'timestamp': '2025-09-10 02:52:11.085843', 'step': 13634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:11.140915', 'step': 13634, 'epoch': 2} {'type': 'loss', 'content': 0.10738351941108704, 'timestamp': '2025-09-10 02:52:11.142897', 'step': 13635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:11.197115', 'step': 13635, 'epoch': 2} {'type': 'loss', 'content': 0.11451733112335205, 'timestamp': '2025-09-10 02:52:11.203070', 'step': 13636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:11.258997', 'step': 13636, 'epoch': 2} {'type': 'loss', 'content': 0.14735205471515656, 'timestamp': '2025-09-10 02:52:11.261016', 'step': 13637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:11.316415', 'step': 13637, 'epoch': 2} {'type': 'loss', 'content': 0.2040187418460846, 'timestamp': '2025-09-10 02:52:11.318579', 'step': 13638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:11.372860', 'step': 13638, 'epoch': 2} {'type': 'loss', 'content': 0.062221501022577286, 'timestamp': '2025-09-10 02:52:11.375195', 'step': 13639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:11.429516', 'step': 13639, 'epoch': 2} {'type': 'loss', 'content': 0.1615997552871704, 'timestamp': '2025-09-10 02:52:11.435623', 'step': 13640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:11.488451', 'step': 13640, 'epoch': 2} {'type': 'loss', 'content': 0.08787700533866882, 'timestamp': '2025-09-10 02:52:11.490597', 'step': 13641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:11.544909', 'step': 13641, 'epoch': 2} {'type': 'loss', 'content': 0.09581174701452255, 'timestamp': '2025-09-10 02:52:11.546877', 'step': 13642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:11.601255', 'step': 13642, 'epoch': 2} {'type': 'loss', 'content': 0.15898697078227997, 'timestamp': '2025-09-10 02:52:11.603413', 'step': 13643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:11.658138', 'step': 13643, 'epoch': 2} {'type': 'loss', 'content': 0.13811995089054108, 'timestamp': '2025-09-10 02:52:11.664484', 'step': 13644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:11.718373', 'step': 13644, 'epoch': 2} {'type': 'loss', 'content': 0.0908321812748909, 'timestamp': '2025-09-10 02:52:11.720479', 'step': 13645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:11.774930', 'step': 13645, 'epoch': 2} {'type': 'loss', 'content': 0.09594494849443436, 'timestamp': '2025-09-10 02:52:11.776897', 'step': 13646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:11.832903', 'step': 13646, 'epoch': 2} {'type': 'loss', 'content': 0.1461700201034546, 'timestamp': '2025-09-10 02:52:11.835221', 'step': 13647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:11.890911', 'step': 13647, 'epoch': 2} {'type': 'loss', 'content': 0.08090706914663315, 'timestamp': '2025-09-10 02:52:11.897488', 'step': 13648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:11.954004', 'step': 13648, 'epoch': 2} {'type': 'loss', 'content': 0.042671263217926025, 'timestamp': '2025-09-10 02:52:11.955979', 'step': 13649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:12.012134', 'step': 13649, 'epoch': 2} {'type': 'loss', 'content': 0.16436506807804108, 'timestamp': '2025-09-10 02:52:12.014116', 'step': 13650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:12.070626', 'step': 13650, 'epoch': 2} {'type': 'loss', 'content': 0.08714298158884048, 'timestamp': '2025-09-10 02:52:12.072627', 'step': 13651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:12.129778', 'step': 13651, 'epoch': 2} {'type': 'loss', 'content': 0.23655961453914642, 'timestamp': '2025-09-10 02:52:12.135942', 'step': 13652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:12.190743', 'step': 13652, 'epoch': 2} {'type': 'loss', 'content': 0.15996617078781128, 'timestamp': '2025-09-10 02:52:12.192692', 'step': 13653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:12.247902', 'step': 13653, 'epoch': 2} {'type': 'loss', 'content': 0.1814408302307129, 'timestamp': '2025-09-10 02:52:12.250026', 'step': 13654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:12.305198', 'step': 13654, 'epoch': 2} {'type': 'loss', 'content': 0.1290634423494339, 'timestamp': '2025-09-10 02:52:12.307396', 'step': 13655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:12.363877', 'step': 13655, 'epoch': 2} {'type': 'loss', 'content': 0.06671919673681259, 'timestamp': '2025-09-10 02:52:12.370031', 'step': 13656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:12.425228', 'step': 13656, 'epoch': 2} {'type': 'loss', 'content': 0.13981978595256805, 'timestamp': '2025-09-10 02:52:12.427379', 'step': 13657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:12.489087', 'step': 13657, 'epoch': 2} {'type': 'loss', 'content': 0.046109918504953384, 'timestamp': '2025-09-10 02:52:12.491322', 'step': 13658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:12.547763', 'step': 13658, 'epoch': 2} {'type': 'loss', 'content': 0.11080844700336456, 'timestamp': '2025-09-10 02:52:12.549870', 'step': 13659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:12.605253', 'step': 13659, 'epoch': 2} {'type': 'loss', 'content': 0.08618790656328201, 'timestamp': '2025-09-10 02:52:12.611356', 'step': 13660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:12.665476', 'step': 13660, 'epoch': 2} {'type': 'loss', 'content': 0.20104622840881348, 'timestamp': '2025-09-10 02:52:12.667623', 'step': 13661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:12.722608', 'step': 13661, 'epoch': 2} {'type': 'loss', 'content': 0.10798836499452591, 'timestamp': '2025-09-10 02:52:12.724902', 'step': 13662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:12.779989', 'step': 13662, 'epoch': 2} {'type': 'loss', 'content': 0.13118574023246765, 'timestamp': '2025-09-10 02:52:12.782167', 'step': 13663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:12.837235', 'step': 13663, 'epoch': 2} {'type': 'loss', 'content': 0.13649430871009827, 'timestamp': '2025-09-10 02:52:12.843304', 'step': 13664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:12.898491', 'step': 13664, 'epoch': 2} {'type': 'loss', 'content': 0.10060476511716843, 'timestamp': '2025-09-10 02:52:12.900601', 'step': 13665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:12.955623', 'step': 13665, 'epoch': 2} {'type': 'loss', 'content': 0.21365855634212494, 'timestamp': '2025-09-10 02:52:12.957756', 'step': 13666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:13.013827', 'step': 13666, 'epoch': 2} {'type': 'loss', 'content': 0.08336219191551208, 'timestamp': '2025-09-10 02:52:13.016017', 'step': 13667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:13.072072', 'step': 13667, 'epoch': 2} {'type': 'loss', 'content': 0.11199058592319489, 'timestamp': '2025-09-10 02:52:13.078304', 'step': 13668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:13.134317', 'step': 13668, 'epoch': 2} {'type': 'loss', 'content': 0.14839181303977966, 'timestamp': '2025-09-10 02:52:13.136657', 'step': 13669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:13.191285', 'step': 13669, 'epoch': 2} {'type': 'loss', 'content': 0.14107465744018555, 'timestamp': '2025-09-10 02:52:13.193555', 'step': 13670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:13.248830', 'step': 13670, 'epoch': 2} {'type': 'loss', 'content': 0.1647101491689682, 'timestamp': '2025-09-10 02:52:13.251079', 'step': 13671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:13.305581', 'step': 13671, 'epoch': 2} {'type': 'loss', 'content': 0.140824556350708, 'timestamp': '2025-09-10 02:52:13.311850', 'step': 13672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:13.366831', 'step': 13672, 'epoch': 2} {'type': 'loss', 'content': 0.1577814668416977, 'timestamp': '2025-09-10 02:52:13.369308', 'step': 13673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:13.424318', 'step': 13673, 'epoch': 2} {'type': 'loss', 'content': 0.11838927865028381, 'timestamp': '2025-09-10 02:52:13.426587', 'step': 13674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:13.481420', 'step': 13674, 'epoch': 2} {'type': 'loss', 'content': 0.06457765400409698, 'timestamp': '2025-09-10 02:52:13.483837', 'step': 13675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:13.539116', 'step': 13675, 'epoch': 2} {'type': 'loss', 'content': 0.11277826130390167, 'timestamp': '2025-09-10 02:52:13.545520', 'step': 13676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:13.601802', 'step': 13676, 'epoch': 2} {'type': 'loss', 'content': 0.11049658805131912, 'timestamp': '2025-09-10 02:52:13.604117', 'step': 13677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:13.658391', 'step': 13677, 'epoch': 2} {'type': 'loss', 'content': 0.1949562281370163, 'timestamp': '2025-09-10 02:52:13.660461', 'step': 13678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:13.715097', 'step': 13678, 'epoch': 2} {'type': 'loss', 'content': 0.21431632339954376, 'timestamp': '2025-09-10 02:52:13.717305', 'step': 13679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:13.777746', 'step': 13679, 'epoch': 2} {'type': 'loss', 'content': 0.11975221335887909, 'timestamp': '2025-09-10 02:52:13.783989', 'step': 13680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:13.840427', 'step': 13680, 'epoch': 2} {'type': 'loss', 'content': 0.11217179149389267, 'timestamp': '2025-09-10 02:52:13.842592', 'step': 13681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:13.896855', 'step': 13681, 'epoch': 2} {'type': 'loss', 'content': 0.13729900121688843, 'timestamp': '2025-09-10 02:52:13.899024', 'step': 13682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:13.954024', 'step': 13682, 'epoch': 2} {'type': 'loss', 'content': 0.07234909385442734, 'timestamp': '2025-09-10 02:52:13.956021', 'step': 13683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:14.011481', 'step': 13683, 'epoch': 2} {'type': 'loss', 'content': 0.09417724609375, 'timestamp': '2025-09-10 02:52:14.017887', 'step': 13684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:14.073184', 'step': 13684, 'epoch': 2} {'type': 'loss', 'content': 0.07419483363628387, 'timestamp': '2025-09-10 02:52:14.075289', 'step': 13685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:14.130636', 'step': 13685, 'epoch': 2} {'type': 'loss', 'content': 0.1050749197602272, 'timestamp': '2025-09-10 02:52:14.132798', 'step': 13686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:14.187535', 'step': 13686, 'epoch': 2} {'type': 'loss', 'content': 0.14262840151786804, 'timestamp': '2025-09-10 02:52:14.189846', 'step': 13687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:14.244590', 'step': 13687, 'epoch': 2} {'type': 'loss', 'content': 0.21231403946876526, 'timestamp': '2025-09-10 02:52:14.250750', 'step': 13688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:14.304976', 'step': 13688, 'epoch': 2} {'type': 'loss', 'content': 0.09403008222579956, 'timestamp': '2025-09-10 02:52:14.307114', 'step': 13689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:14.361446', 'step': 13689, 'epoch': 2} {'type': 'loss', 'content': 0.1330004632472992, 'timestamp': '2025-09-10 02:52:14.363494', 'step': 13690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:14.417695', 'step': 13690, 'epoch': 2} {'type': 'loss', 'content': 0.09755327552556992, 'timestamp': '2025-09-10 02:52:14.425466', 'step': 13691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:14.485904', 'step': 13691, 'epoch': 2} {'type': 'loss', 'content': 0.22534716129302979, 'timestamp': '2025-09-10 02:52:14.495205', 'step': 13692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:14.548551', 'step': 13692, 'epoch': 2} {'type': 'loss', 'content': 0.10008237510919571, 'timestamp': '2025-09-10 02:52:14.551376', 'step': 13693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:14.605157', 'step': 13693, 'epoch': 2} {'type': 'loss', 'content': 0.15192914009094238, 'timestamp': '2025-09-10 02:52:14.607348', 'step': 13694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:14.662107', 'step': 13694, 'epoch': 2} {'type': 'loss', 'content': 0.1381687968969345, 'timestamp': '2025-09-10 02:52:14.664009', 'step': 13695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:14.728379', 'step': 13695, 'epoch': 2} {'type': 'loss', 'content': 0.1159360483288765, 'timestamp': '2025-09-10 02:52:14.734585', 'step': 13696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:14.790822', 'step': 13696, 'epoch': 2} {'type': 'loss', 'content': 0.11026689410209656, 'timestamp': '2025-09-10 02:52:14.793010', 'step': 13697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:14.846987', 'step': 13697, 'epoch': 2} {'type': 'loss', 'content': 0.07866309583187103, 'timestamp': '2025-09-10 02:52:14.849185', 'step': 13698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:14.903536', 'step': 13698, 'epoch': 2} {'type': 'loss', 'content': 0.178519144654274, 'timestamp': '2025-09-10 02:52:14.905765', 'step': 13699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:14.960926', 'step': 13699, 'epoch': 2} {'type': 'loss', 'content': 0.15450552105903625, 'timestamp': '2025-09-10 02:52:14.967444', 'step': 13700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:15.021739', 'step': 13700, 'epoch': 2} {'type': 'loss', 'content': 0.11423462629318237, 'timestamp': '2025-09-10 02:52:15.026081', 'step': 13701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:15.082167', 'step': 13701, 'epoch': 2} {'type': 'loss', 'content': 0.20722496509552002, 'timestamp': '2025-09-10 02:52:15.084442', 'step': 13702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:15.139891', 'step': 13702, 'epoch': 2} {'type': 'loss', 'content': 0.10819549858570099, 'timestamp': '2025-09-10 02:52:15.141891', 'step': 13703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:15.197216', 'step': 13703, 'epoch': 2} {'type': 'loss', 'content': 0.17013974487781525, 'timestamp': '2025-09-10 02:52:15.203600', 'step': 13704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:15.258022', 'step': 13704, 'epoch': 2} {'type': 'loss', 'content': 0.041515231132507324, 'timestamp': '2025-09-10 02:52:15.260178', 'step': 13705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:15.315251', 'step': 13705, 'epoch': 2} {'type': 'loss', 'content': 0.07104694843292236, 'timestamp': '2025-09-10 02:52:15.317441', 'step': 13706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:15.372838', 'step': 13706, 'epoch': 2} {'type': 'loss', 'content': 0.1533404141664505, 'timestamp': '2025-09-10 02:52:15.375110', 'step': 13707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:15.429644', 'step': 13707, 'epoch': 2} {'type': 'loss', 'content': 0.07445656508207321, 'timestamp': '2025-09-10 02:52:15.435696', 'step': 13708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:15.490513', 'step': 13708, 'epoch': 2} {'type': 'loss', 'content': 0.09451259672641754, 'timestamp': '2025-09-10 02:52:15.492599', 'step': 13709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:15.548470', 'step': 13709, 'epoch': 2} {'type': 'loss', 'content': 0.17047001421451569, 'timestamp': '2025-09-10 02:52:15.550692', 'step': 13710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:15.605472', 'step': 13710, 'epoch': 2} {'type': 'loss', 'content': 0.10822393745183945, 'timestamp': '2025-09-10 02:52:15.607697', 'step': 13711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:15.662474', 'step': 13711, 'epoch': 2} {'type': 'loss', 'content': 0.09806475788354874, 'timestamp': '2025-09-10 02:52:15.668713', 'step': 13712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:15.723387', 'step': 13712, 'epoch': 2} {'type': 'loss', 'content': 0.05999451130628586, 'timestamp': '2025-09-10 02:52:15.731858', 'step': 13713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:15.796241', 'step': 13713, 'epoch': 2} {'type': 'loss', 'content': 0.20127977430820465, 'timestamp': '2025-09-10 02:52:15.803139', 'step': 13714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:15.867946', 'step': 13714, 'epoch': 2} {'type': 'loss', 'content': 0.17708277702331543, 'timestamp': '2025-09-10 02:52:15.877050', 'step': 13715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:15.936679', 'step': 13715, 'epoch': 2} {'type': 'loss', 'content': 0.09755042940378189, 'timestamp': '2025-09-10 02:52:15.942663', 'step': 13716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:15.998805', 'step': 13716, 'epoch': 2} {'type': 'loss', 'content': 0.1110633984208107, 'timestamp': '2025-09-10 02:52:16.000905', 'step': 13717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:16.055196', 'step': 13717, 'epoch': 2} {'type': 'loss', 'content': 0.10306266695261002, 'timestamp': '2025-09-10 02:52:16.057250', 'step': 13718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:16.114258', 'step': 13718, 'epoch': 2} {'type': 'loss', 'content': 0.13631030917167664, 'timestamp': '2025-09-10 02:52:16.116750', 'step': 13719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:16.171413', 'step': 13719, 'epoch': 2} {'type': 'loss', 'content': 0.11773169785737991, 'timestamp': '2025-09-10 02:52:16.177930', 'step': 13720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:16.234173', 'step': 13720, 'epoch': 2} {'type': 'loss', 'content': 0.04910428449511528, 'timestamp': '2025-09-10 02:52:16.236310', 'step': 13721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:16.291098', 'step': 13721, 'epoch': 2} {'type': 'loss', 'content': 0.05049845948815346, 'timestamp': '2025-09-10 02:52:16.293206', 'step': 13722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:16.347545', 'step': 13722, 'epoch': 2} {'type': 'loss', 'content': 0.13325268030166626, 'timestamp': '2025-09-10 02:52:16.349728', 'step': 13723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:16.406537', 'step': 13723, 'epoch': 2} {'type': 'loss', 'content': 0.17503662407398224, 'timestamp': '2025-09-10 02:52:16.412645', 'step': 13724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:16.465745', 'step': 13724, 'epoch': 2} {'type': 'loss', 'content': 0.1081690788269043, 'timestamp': '2025-09-10 02:52:16.467957', 'step': 13725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:16.521126', 'step': 13725, 'epoch': 2} {'type': 'loss', 'content': 0.16078028082847595, 'timestamp': '2025-09-10 02:52:16.523231', 'step': 13726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:16.577333', 'step': 13726, 'epoch': 2} {'type': 'loss', 'content': 0.10518088191747665, 'timestamp': '2025-09-10 02:52:16.579494', 'step': 13727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:16.633266', 'step': 13727, 'epoch': 2} {'type': 'loss', 'content': 0.1486022025346756, 'timestamp': '2025-09-10 02:52:16.639470', 'step': 13728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:16.692687', 'step': 13728, 'epoch': 2} {'type': 'loss', 'content': 0.1199619323015213, 'timestamp': '2025-09-10 02:52:16.694944', 'step': 13729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:16.749402', 'step': 13729, 'epoch': 2} {'type': 'loss', 'content': 0.14505811035633087, 'timestamp': '2025-09-10 02:52:16.751511', 'step': 13730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:16.807865', 'step': 13730, 'epoch': 2} {'type': 'loss', 'content': 0.11503949761390686, 'timestamp': '2025-09-10 02:52:16.809997', 'step': 13731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:16.864400', 'step': 13731, 'epoch': 2} {'type': 'loss', 'content': 0.11741811782121658, 'timestamp': '2025-09-10 02:52:16.870504', 'step': 13732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:16.924434', 'step': 13732, 'epoch': 2} {'type': 'loss', 'content': 0.08681592345237732, 'timestamp': '2025-09-10 02:52:16.926704', 'step': 13733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:16.981147', 'step': 13733, 'epoch': 2} {'type': 'loss', 'content': 0.14282295107841492, 'timestamp': '2025-09-10 02:52:16.983480', 'step': 13734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:17.038312', 'step': 13734, 'epoch': 2} {'type': 'loss', 'content': 0.09098756313323975, 'timestamp': '2025-09-10 02:52:17.040470', 'step': 13735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:17.096263', 'step': 13735, 'epoch': 2} {'type': 'loss', 'content': 0.22617515921592712, 'timestamp': '2025-09-10 02:52:17.102586', 'step': 13736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:17.156523', 'step': 13736, 'epoch': 2} {'type': 'loss', 'content': 0.0633484497666359, 'timestamp': '2025-09-10 02:52:17.158689', 'step': 13737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:17.212987', 'step': 13737, 'epoch': 2} {'type': 'loss', 'content': 0.18286846578121185, 'timestamp': '2025-09-10 02:52:17.215024', 'step': 13738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:17.270050', 'step': 13738, 'epoch': 2} {'type': 'loss', 'content': 0.12075258046388626, 'timestamp': '2025-09-10 02:52:17.272379', 'step': 13739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:17.328097', 'step': 13739, 'epoch': 2} {'type': 'loss', 'content': 0.09976161271333694, 'timestamp': '2025-09-10 02:52:17.334192', 'step': 13740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:17.387669', 'step': 13740, 'epoch': 2} {'type': 'loss', 'content': 0.14398856461048126, 'timestamp': '2025-09-10 02:52:17.389868', 'step': 13741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:17.444802', 'step': 13741, 'epoch': 2} {'type': 'loss', 'content': 0.059162259101867676, 'timestamp': '2025-09-10 02:52:17.447029', 'step': 13742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:17.501595', 'step': 13742, 'epoch': 2} {'type': 'loss', 'content': 0.23090451955795288, 'timestamp': '2025-09-10 02:52:17.503937', 'step': 13743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:17.558290', 'step': 13743, 'epoch': 2} {'type': 'loss', 'content': 0.1193685531616211, 'timestamp': '2025-09-10 02:52:17.564196', 'step': 13744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:17.618821', 'step': 13744, 'epoch': 2} {'type': 'loss', 'content': 0.13176853954792023, 'timestamp': '2025-09-10 02:52:17.621022', 'step': 13745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:17.674907', 'step': 13745, 'epoch': 2} {'type': 'loss', 'content': 0.1311720609664917, 'timestamp': '2025-09-10 02:52:17.677243', 'step': 13746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:17.731818', 'step': 13746, 'epoch': 2} {'type': 'loss', 'content': 0.13237625360488892, 'timestamp': '2025-09-10 02:52:17.734266', 'step': 13747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:17.788365', 'step': 13747, 'epoch': 2} {'type': 'loss', 'content': 0.17973795533180237, 'timestamp': '2025-09-10 02:52:17.794506', 'step': 13748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:17.849392', 'step': 13748, 'epoch': 2} {'type': 'loss', 'content': 0.14707615971565247, 'timestamp': '2025-09-10 02:52:17.851586', 'step': 13749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:17.906921', 'step': 13749, 'epoch': 2} {'type': 'loss', 'content': 0.09814326465129852, 'timestamp': '2025-09-10 02:52:17.909130', 'step': 13750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:17.964130', 'step': 13750, 'epoch': 2} {'type': 'loss', 'content': 0.1824275106191635, 'timestamp': '2025-09-10 02:52:17.966146', 'step': 13751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:18.020900', 'step': 13751, 'epoch': 2} {'type': 'loss', 'content': 0.16756302118301392, 'timestamp': '2025-09-10 02:52:18.027166', 'step': 13752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:18.081244', 'step': 13752, 'epoch': 2} {'type': 'loss', 'content': 0.08379650115966797, 'timestamp': '2025-09-10 02:52:18.083556', 'step': 13753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:18.137754', 'step': 13753, 'epoch': 2} {'type': 'loss', 'content': 0.2823646664619446, 'timestamp': '2025-09-10 02:52:18.140086', 'step': 13754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:18.194760', 'step': 13754, 'epoch': 2} {'type': 'loss', 'content': 0.19481737911701202, 'timestamp': '2025-09-10 02:52:18.197310', 'step': 13755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:18.251827', 'step': 13755, 'epoch': 2} {'type': 'loss', 'content': 0.0704004317522049, 'timestamp': '2025-09-10 02:52:18.257913', 'step': 13756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:18.311927', 'step': 13756, 'epoch': 2} {'type': 'loss', 'content': 0.09178785234689713, 'timestamp': '2025-09-10 02:52:18.314094', 'step': 13757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:18.368005', 'step': 13757, 'epoch': 2} {'type': 'loss', 'content': 0.13545872271060944, 'timestamp': '2025-09-10 02:52:18.369970', 'step': 13758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:18.424480', 'step': 13758, 'epoch': 2} {'type': 'loss', 'content': 0.10423587262630463, 'timestamp': '2025-09-10 02:52:18.426636', 'step': 13759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:18.481942', 'step': 13759, 'epoch': 2} {'type': 'loss', 'content': 0.17560678720474243, 'timestamp': '2025-09-10 02:52:18.488011', 'step': 13760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:18.543864', 'step': 13760, 'epoch': 2} {'type': 'loss', 'content': 0.13094305992126465, 'timestamp': '2025-09-10 02:52:18.546042', 'step': 13761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:18.601761', 'step': 13761, 'epoch': 2} {'type': 'loss', 'content': 0.0901319682598114, 'timestamp': '2025-09-10 02:52:18.604026', 'step': 13762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:18.658787', 'step': 13762, 'epoch': 2} {'type': 'loss', 'content': 0.07843758910894394, 'timestamp': '2025-09-10 02:52:18.661042', 'step': 13763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:18.714823', 'step': 13763, 'epoch': 2} {'type': 'loss', 'content': 0.1085769534111023, 'timestamp': '2025-09-10 02:52:18.720978', 'step': 13764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:18.774444', 'step': 13764, 'epoch': 2} {'type': 'loss', 'content': 0.11229559034109116, 'timestamp': '2025-09-10 02:52:18.776536', 'step': 13765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:18.830514', 'step': 13765, 'epoch': 2} {'type': 'loss', 'content': 0.13290439546108246, 'timestamp': '2025-09-10 02:52:18.832685', 'step': 13766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:18.887211', 'step': 13766, 'epoch': 2} {'type': 'loss', 'content': 0.1876659244298935, 'timestamp': '2025-09-10 02:52:18.889459', 'step': 13767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:18.944384', 'step': 13767, 'epoch': 2} {'type': 'loss', 'content': 0.0439240001142025, 'timestamp': '2025-09-10 02:52:18.950568', 'step': 13768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:19.004107', 'step': 13768, 'epoch': 2} {'type': 'loss', 'content': 0.065900519490242, 'timestamp': '2025-09-10 02:52:19.006191', 'step': 13769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:19.059859', 'step': 13769, 'epoch': 2} {'type': 'loss', 'content': 0.10942761600017548, 'timestamp': '2025-09-10 02:52:19.061953', 'step': 13770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:19.116340', 'step': 13770, 'epoch': 2} {'type': 'loss', 'content': 0.10752411186695099, 'timestamp': '2025-09-10 02:52:19.118523', 'step': 13771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:19.173089', 'step': 13771, 'epoch': 2} {'type': 'loss', 'content': 0.04551170393824577, 'timestamp': '2025-09-10 02:52:19.179151', 'step': 13772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:19.234576', 'step': 13772, 'epoch': 2} {'type': 'loss', 'content': 0.1362193524837494, 'timestamp': '2025-09-10 02:52:19.236736', 'step': 13773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:19.290555', 'step': 13773, 'epoch': 2} {'type': 'loss', 'content': 0.12081625312566757, 'timestamp': '2025-09-10 02:52:19.292714', 'step': 13774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:19.346276', 'step': 13774, 'epoch': 2} {'type': 'loss', 'content': 0.20496699213981628, 'timestamp': '2025-09-10 02:52:19.348660', 'step': 13775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:19.405547', 'step': 13775, 'epoch': 2} {'type': 'loss', 'content': 0.16849565505981445, 'timestamp': '2025-09-10 02:52:19.411248', 'step': 13776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:19.465328', 'step': 13776, 'epoch': 2} {'type': 'loss', 'content': 0.20578962564468384, 'timestamp': '2025-09-10 02:52:19.467197', 'step': 13777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:19.521445', 'step': 13777, 'epoch': 2} {'type': 'loss', 'content': 0.27569887042045593, 'timestamp': '2025-09-10 02:52:19.523226', 'step': 13778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:19.578675', 'step': 13778, 'epoch': 2} {'type': 'loss', 'content': 0.22369682788848877, 'timestamp': '2025-09-10 02:52:19.580878', 'step': 13779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:19.635614', 'step': 13779, 'epoch': 2} {'type': 'loss', 'content': 0.1069089025259018, 'timestamp': '2025-09-10 02:52:19.641926', 'step': 13780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:19.696879', 'step': 13780, 'epoch': 2} {'type': 'loss', 'content': 0.14777684211730957, 'timestamp': '2025-09-10 02:52:19.699006', 'step': 13781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:19.753820', 'step': 13781, 'epoch': 2} {'type': 'loss', 'content': 0.20332232117652893, 'timestamp': '2025-09-10 02:52:19.756092', 'step': 13782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:19.812083', 'step': 13782, 'epoch': 2} {'type': 'loss', 'content': 0.10836286842823029, 'timestamp': '2025-09-10 02:52:19.814228', 'step': 13783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:19.868078', 'step': 13783, 'epoch': 2} {'type': 'loss', 'content': 0.09081888198852539, 'timestamp': '2025-09-10 02:52:19.874053', 'step': 13784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:19.927808', 'step': 13784, 'epoch': 2} {'type': 'loss', 'content': 0.12371253222227097, 'timestamp': '2025-09-10 02:52:19.929976', 'step': 13785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:19.984828', 'step': 13785, 'epoch': 2} {'type': 'loss', 'content': 0.10856857895851135, 'timestamp': '2025-09-10 02:52:19.987078', 'step': 13786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:20.042324', 'step': 13786, 'epoch': 2} {'type': 'loss', 'content': 0.07954481989145279, 'timestamp': '2025-09-10 02:52:20.044595', 'step': 13787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:20.098966', 'step': 13787, 'epoch': 2} {'type': 'loss', 'content': 0.08838269859552383, 'timestamp': '2025-09-10 02:52:20.105408', 'step': 13788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:20.159356', 'step': 13788, 'epoch': 2} {'type': 'loss', 'content': 0.09981362521648407, 'timestamp': '2025-09-10 02:52:20.161627', 'step': 13789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:20.215587', 'step': 13789, 'epoch': 2} {'type': 'loss', 'content': 0.10590867698192596, 'timestamp': '2025-09-10 02:52:20.218060', 'step': 13790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:20.272743', 'step': 13790, 'epoch': 2} {'type': 'loss', 'content': 0.1477958858013153, 'timestamp': '2025-09-10 02:52:20.275240', 'step': 13791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:20.329402', 'step': 13791, 'epoch': 2} {'type': 'loss', 'content': 0.10622017830610275, 'timestamp': '2025-09-10 02:52:20.335525', 'step': 13792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:20.389504', 'step': 13792, 'epoch': 2} {'type': 'loss', 'content': 0.06703738868236542, 'timestamp': '2025-09-10 02:52:20.391689', 'step': 13793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:20.445916', 'step': 13793, 'epoch': 2} {'type': 'loss', 'content': 0.1422126591205597, 'timestamp': '2025-09-10 02:52:20.448139', 'step': 13794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:20.503090', 'step': 13794, 'epoch': 2} {'type': 'loss', 'content': 0.10011481493711472, 'timestamp': '2025-09-10 02:52:20.505335', 'step': 13795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:20.560193', 'step': 13795, 'epoch': 2} {'type': 'loss', 'content': 0.16547885537147522, 'timestamp': '2025-09-10 02:52:20.566592', 'step': 13796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:20.621422', 'step': 13796, 'epoch': 2} {'type': 'loss', 'content': 0.11304078996181488, 'timestamp': '2025-09-10 02:52:20.623719', 'step': 13797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:20.678145', 'step': 13797, 'epoch': 2} {'type': 'loss', 'content': 0.11922334134578705, 'timestamp': '2025-09-10 02:52:20.680291', 'step': 13798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:20.735527', 'step': 13798, 'epoch': 2} {'type': 'loss', 'content': 0.13121303915977478, 'timestamp': '2025-09-10 02:52:20.737556', 'step': 13799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:20.792161', 'step': 13799, 'epoch': 2} {'type': 'loss', 'content': 0.11220380663871765, 'timestamp': '2025-09-10 02:52:20.798260', 'step': 13800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:20.851144', 'step': 13800, 'epoch': 2} {'type': 'loss', 'content': 0.11493317037820816, 'timestamp': '2025-09-10 02:52:20.852945', 'step': 13801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:20.906570', 'step': 13801, 'epoch': 2} {'type': 'loss', 'content': 0.14456593990325928, 'timestamp': '2025-09-10 02:52:20.908779', 'step': 13802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:20.964831', 'step': 13802, 'epoch': 2} {'type': 'loss', 'content': 0.18192532658576965, 'timestamp': '2025-09-10 02:52:20.966854', 'step': 13803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:21.022549', 'step': 13803, 'epoch': 2} {'type': 'loss', 'content': 0.08661895990371704, 'timestamp': '2025-09-10 02:52:21.029090', 'step': 13804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:21.083894', 'step': 13804, 'epoch': 2} {'type': 'loss', 'content': 0.17791670560836792, 'timestamp': '2025-09-10 02:52:21.086087', 'step': 13805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:21.143107', 'step': 13805, 'epoch': 2} {'type': 'loss', 'content': 0.07652626931667328, 'timestamp': '2025-09-10 02:52:21.145389', 'step': 13806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:21.200627', 'step': 13806, 'epoch': 2} {'type': 'loss', 'content': 0.10450248420238495, 'timestamp': '2025-09-10 02:52:21.202820', 'step': 13807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:21.256999', 'step': 13807, 'epoch': 2} {'type': 'loss', 'content': 0.08898764103651047, 'timestamp': '2025-09-10 02:52:21.263121', 'step': 13808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:21.316336', 'step': 13808, 'epoch': 2} {'type': 'loss', 'content': 0.09487216919660568, 'timestamp': '2025-09-10 02:52:21.318610', 'step': 13809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:21.372614', 'step': 13809, 'epoch': 2} {'type': 'loss', 'content': 0.1368996500968933, 'timestamp': '2025-09-10 02:52:21.374672', 'step': 13810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:21.428716', 'step': 13810, 'epoch': 2} {'type': 'loss', 'content': 0.0801248699426651, 'timestamp': '2025-09-10 02:52:21.430631', 'step': 13811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:21.485862', 'step': 13811, 'epoch': 2} {'type': 'loss', 'content': 0.09477809071540833, 'timestamp': '2025-09-10 02:52:21.491987', 'step': 13812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:21.547204', 'step': 13812, 'epoch': 2} {'type': 'loss', 'content': 0.11536939442157745, 'timestamp': '2025-09-10 02:52:21.549357', 'step': 13813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:21.603492', 'step': 13813, 'epoch': 2} {'type': 'loss', 'content': 0.12019004672765732, 'timestamp': '2025-09-10 02:52:21.605602', 'step': 13814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:21.661071', 'step': 13814, 'epoch': 2} {'type': 'loss', 'content': 0.18123826384544373, 'timestamp': '2025-09-10 02:52:21.662771', 'step': 13815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:21.716027', 'step': 13815, 'epoch': 2} {'type': 'loss', 'content': 0.14749325811862946, 'timestamp': '2025-09-10 02:52:21.722105', 'step': 13816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:21.775846', 'step': 13816, 'epoch': 2} {'type': 'loss', 'content': 0.10383641719818115, 'timestamp': '2025-09-10 02:52:21.777940', 'step': 13817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:21.832496', 'step': 13817, 'epoch': 2} {'type': 'loss', 'content': 0.15316210687160492, 'timestamp': '2025-09-10 02:52:21.834809', 'step': 13818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:21.890830', 'step': 13818, 'epoch': 2} {'type': 'loss', 'content': 0.10737784206867218, 'timestamp': '2025-09-10 02:52:21.893149', 'step': 13819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:21.950049', 'step': 13819, 'epoch': 2} {'type': 'loss', 'content': 0.2086721658706665, 'timestamp': '2025-09-10 02:52:21.956541', 'step': 13820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:22.010783', 'step': 13820, 'epoch': 2} {'type': 'loss', 'content': 0.07189948111772537, 'timestamp': '2025-09-10 02:52:22.012844', 'step': 13821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:22.067061', 'step': 13821, 'epoch': 2} {'type': 'loss', 'content': 0.10633138567209244, 'timestamp': '2025-09-10 02:52:22.069310', 'step': 13822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:22.123798', 'step': 13822, 'epoch': 2} {'type': 'loss', 'content': 0.12214989960193634, 'timestamp': '2025-09-10 02:52:22.125984', 'step': 13823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:22.180693', 'step': 13823, 'epoch': 2} {'type': 'loss', 'content': 0.19161391258239746, 'timestamp': '2025-09-10 02:52:22.186976', 'step': 13824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:22.241227', 'step': 13824, 'epoch': 2} {'type': 'loss', 'content': 0.09438420832157135, 'timestamp': '2025-09-10 02:52:22.243335', 'step': 13825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:22.297519', 'step': 13825, 'epoch': 2} {'type': 'loss', 'content': 0.08847177773714066, 'timestamp': '2025-09-10 02:52:22.300980', 'step': 13826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:22.358054', 'step': 13826, 'epoch': 2} {'type': 'loss', 'content': 0.08973924815654755, 'timestamp': '2025-09-10 02:52:22.360326', 'step': 13827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:22.419126', 'step': 13827, 'epoch': 2} {'type': 'loss', 'content': 0.09902311861515045, 'timestamp': '2025-09-10 02:52:22.425244', 'step': 13828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:22.478330', 'step': 13828, 'epoch': 2} {'type': 'loss', 'content': 0.13024786114692688, 'timestamp': '2025-09-10 02:52:22.480515', 'step': 13829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:22.533501', 'step': 13829, 'epoch': 2} {'type': 'loss', 'content': 0.15438510477542877, 'timestamp': '2025-09-10 02:52:22.535681', 'step': 13830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:22.589853', 'step': 13830, 'epoch': 2} {'type': 'loss', 'content': 0.12010136991739273, 'timestamp': '2025-09-10 02:52:22.591774', 'step': 13831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:22.646316', 'step': 13831, 'epoch': 2} {'type': 'loss', 'content': 0.06694864481687546, 'timestamp': '2025-09-10 02:52:22.652462', 'step': 13832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:22.706180', 'step': 13832, 'epoch': 2} {'type': 'loss', 'content': 0.12010467797517776, 'timestamp': '2025-09-10 02:52:22.708490', 'step': 13833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:22.763684', 'step': 13833, 'epoch': 2} {'type': 'loss', 'content': 0.06701592355966568, 'timestamp': '2025-09-10 02:52:22.766168', 'step': 13834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:22.819842', 'step': 13834, 'epoch': 2} {'type': 'loss', 'content': 0.10490608960390091, 'timestamp': '2025-09-10 02:52:22.821982', 'step': 13835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:22.877189', 'step': 13835, 'epoch': 2} {'type': 'loss', 'content': 0.13947820663452148, 'timestamp': '2025-09-10 02:52:22.883644', 'step': 13836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:22.938819', 'step': 13836, 'epoch': 2} {'type': 'loss', 'content': 0.11689180880784988, 'timestamp': '2025-09-10 02:52:22.941096', 'step': 13837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:22.996237', 'step': 13837, 'epoch': 2} {'type': 'loss', 'content': 0.09110765159130096, 'timestamp': '2025-09-10 02:52:22.998414', 'step': 13838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:23.052400', 'step': 13838, 'epoch': 2} {'type': 'loss', 'content': 0.11352170258760452, 'timestamp': '2025-09-10 02:52:23.054533', 'step': 13839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:23.108740', 'step': 13839, 'epoch': 2} {'type': 'loss', 'content': 0.1688983142375946, 'timestamp': '2025-09-10 02:52:23.114874', 'step': 13840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:23.167883', 'step': 13840, 'epoch': 2} {'type': 'loss', 'content': 0.07167316973209381, 'timestamp': '2025-09-10 02:52:23.170135', 'step': 13841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:23.223776', 'step': 13841, 'epoch': 2} {'type': 'loss', 'content': 0.1083025336265564, 'timestamp': '2025-09-10 02:52:23.225947', 'step': 13842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:23.280062', 'step': 13842, 'epoch': 2} {'type': 'loss', 'content': 0.07872845977544785, 'timestamp': '2025-09-10 02:52:23.282160', 'step': 13843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:23.335756', 'step': 13843, 'epoch': 2} {'type': 'loss', 'content': 0.07975746691226959, 'timestamp': '2025-09-10 02:52:23.341865', 'step': 13844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:23.395571', 'step': 13844, 'epoch': 2} {'type': 'loss', 'content': 0.13443152606487274, 'timestamp': '2025-09-10 02:52:23.397420', 'step': 13845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:23.450678', 'step': 13845, 'epoch': 2} {'type': 'loss', 'content': 0.1759341061115265, 'timestamp': '2025-09-10 02:52:23.452427', 'step': 13846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:23.506176', 'step': 13846, 'epoch': 2} {'type': 'loss', 'content': 0.17929904162883759, 'timestamp': '2025-09-10 02:52:23.508401', 'step': 13847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:23.563112', 'step': 13847, 'epoch': 2} {'type': 'loss', 'content': 0.19768427312374115, 'timestamp': '2025-09-10 02:52:23.569691', 'step': 13848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:23.628208', 'step': 13848, 'epoch': 2} {'type': 'loss', 'content': 0.12021429091691971, 'timestamp': '2025-09-10 02:52:23.630745', 'step': 13849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:23.689838', 'step': 13849, 'epoch': 2} {'type': 'loss', 'content': 0.10416898876428604, 'timestamp': '2025-09-10 02:52:23.692140', 'step': 13850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:23.750115', 'step': 13850, 'epoch': 2} {'type': 'loss', 'content': 0.09467560797929764, 'timestamp': '2025-09-10 02:52:23.752153', 'step': 13851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:23.808903', 'step': 13851, 'epoch': 2} {'type': 'loss', 'content': 0.19380824267864227, 'timestamp': '2025-09-10 02:52:23.815432', 'step': 13852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:23.872314', 'step': 13852, 'epoch': 2} {'type': 'loss', 'content': 0.10584491491317749, 'timestamp': '2025-09-10 02:52:23.874319', 'step': 13853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:23.930283', 'step': 13853, 'epoch': 2} {'type': 'loss', 'content': 0.1445920765399933, 'timestamp': '2025-09-10 02:52:23.932456', 'step': 13854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:23.989553', 'step': 13854, 'epoch': 2} {'type': 'loss', 'content': 0.137450709939003, 'timestamp': '2025-09-10 02:52:23.991379', 'step': 13855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:24.048016', 'step': 13855, 'epoch': 2} {'type': 'loss', 'content': 0.17741747200489044, 'timestamp': '2025-09-10 02:52:24.054090', 'step': 13856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:24.111166', 'step': 13856, 'epoch': 2} {'type': 'loss', 'content': 0.12305070459842682, 'timestamp': '2025-09-10 02:52:24.113106', 'step': 13857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:24.167460', 'step': 13857, 'epoch': 2} {'type': 'loss', 'content': 0.13896584510803223, 'timestamp': '2025-09-10 02:52:24.169185', 'step': 13858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:24.222217', 'step': 13858, 'epoch': 2} {'type': 'loss', 'content': 0.16345413029193878, 'timestamp': '2025-09-10 02:52:24.224082', 'step': 13859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:24.278782', 'step': 13859, 'epoch': 2} {'type': 'loss', 'content': 0.07675474882125854, 'timestamp': '2025-09-10 02:52:24.284903', 'step': 13860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:24.338354', 'step': 13860, 'epoch': 2} {'type': 'loss', 'content': 0.1606883853673935, 'timestamp': '2025-09-10 02:52:24.340133', 'step': 13861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:24.394742', 'step': 13861, 'epoch': 2} {'type': 'loss', 'content': 0.14669811725616455, 'timestamp': '2025-09-10 02:52:24.396975', 'step': 13862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:24.453791', 'step': 13862, 'epoch': 2} {'type': 'loss', 'content': 0.17661252617835999, 'timestamp': '2025-09-10 02:52:24.456116', 'step': 13863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:24.514142', 'step': 13863, 'epoch': 2} {'type': 'loss', 'content': 0.13568846881389618, 'timestamp': '2025-09-10 02:52:24.520397', 'step': 13864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:24.576716', 'step': 13864, 'epoch': 2} {'type': 'loss', 'content': 0.19388802349567413, 'timestamp': '2025-09-10 02:52:24.578679', 'step': 13865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:24.634767', 'step': 13865, 'epoch': 2} {'type': 'loss', 'content': 0.07075285911560059, 'timestamp': '2025-09-10 02:52:24.636762', 'step': 13866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:24.691838', 'step': 13866, 'epoch': 2} {'type': 'loss', 'content': 0.15072296559810638, 'timestamp': '2025-09-10 02:52:24.693762', 'step': 13867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:24.748381', 'step': 13867, 'epoch': 2} {'type': 'loss', 'content': 0.06517960876226425, 'timestamp': '2025-09-10 02:52:24.754967', 'step': 13868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:24.810571', 'step': 13868, 'epoch': 2} {'type': 'loss', 'content': 0.08250854909420013, 'timestamp': '2025-09-10 02:52:24.812813', 'step': 13869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:24.869131', 'step': 13869, 'epoch': 2} {'type': 'loss', 'content': 0.11529006063938141, 'timestamp': '2025-09-10 02:52:24.871034', 'step': 13870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:24.927608', 'step': 13870, 'epoch': 2} {'type': 'loss', 'content': 0.05499544367194176, 'timestamp': '2025-09-10 02:52:24.929622', 'step': 13871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:24.984591', 'step': 13871, 'epoch': 2} {'type': 'loss', 'content': 0.11381710320711136, 'timestamp': '2025-09-10 02:52:24.990876', 'step': 13872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:25.044996', 'step': 13872, 'epoch': 2} {'type': 'loss', 'content': 0.07784992456436157, 'timestamp': '2025-09-10 02:52:25.047024', 'step': 13873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:25.102191', 'step': 13873, 'epoch': 2} {'type': 'loss', 'content': 0.1966577172279358, 'timestamp': '2025-09-10 02:52:25.104090', 'step': 13874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:25.158769', 'step': 13874, 'epoch': 2} {'type': 'loss', 'content': 0.10439543426036835, 'timestamp': '2025-09-10 02:52:25.160800', 'step': 13875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:25.216274', 'step': 13875, 'epoch': 2} {'type': 'loss', 'content': 0.13856200873851776, 'timestamp': '2025-09-10 02:52:25.222436', 'step': 13876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:25.275575', 'step': 13876, 'epoch': 2} {'type': 'loss', 'content': 0.07111289352178574, 'timestamp': '2025-09-10 02:52:25.277625', 'step': 13877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:25.332253', 'step': 13877, 'epoch': 2} {'type': 'loss', 'content': 0.06332778930664062, 'timestamp': '2025-09-10 02:52:25.334189', 'step': 13878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:25.389695', 'step': 13878, 'epoch': 2} {'type': 'loss', 'content': 0.15679475665092468, 'timestamp': '2025-09-10 02:52:25.391705', 'step': 13879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:25.446776', 'step': 13879, 'epoch': 2} {'type': 'loss', 'content': 0.09482544660568237, 'timestamp': '2025-09-10 02:52:25.452797', 'step': 13880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:25.508992', 'step': 13880, 'epoch': 2} {'type': 'loss', 'content': 0.08620497584342957, 'timestamp': '2025-09-10 02:52:25.511074', 'step': 13881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:25.566950', 'step': 13881, 'epoch': 2} {'type': 'loss', 'content': 0.17617979645729065, 'timestamp': '2025-09-10 02:52:25.568963', 'step': 13882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:25.626783', 'step': 13882, 'epoch': 2} {'type': 'loss', 'content': 0.0867733582854271, 'timestamp': '2025-09-10 02:52:25.628825', 'step': 13883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:25.685213', 'step': 13883, 'epoch': 2} {'type': 'loss', 'content': 0.11619310826063156, 'timestamp': '2025-09-10 02:52:25.691355', 'step': 13884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:25.744705', 'step': 13884, 'epoch': 2} {'type': 'loss', 'content': 0.03884408250451088, 'timestamp': '2025-09-10 02:52:25.746776', 'step': 13885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:25.803237', 'step': 13885, 'epoch': 2} {'type': 'loss', 'content': 0.0654357448220253, 'timestamp': '2025-09-10 02:52:25.805262', 'step': 13886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:25.859656', 'step': 13886, 'epoch': 2} {'type': 'loss', 'content': 0.09470933675765991, 'timestamp': '2025-09-10 02:52:25.861668', 'step': 13887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:25.916143', 'step': 13887, 'epoch': 2} {'type': 'loss', 'content': 0.16660019755363464, 'timestamp': '2025-09-10 02:52:25.922192', 'step': 13888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:25.975525', 'step': 13888, 'epoch': 2} {'type': 'loss', 'content': 0.08322439342737198, 'timestamp': '2025-09-10 02:52:25.977531', 'step': 13889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:26.032654', 'step': 13889, 'epoch': 2} {'type': 'loss', 'content': 0.14087656140327454, 'timestamp': '2025-09-10 02:52:26.034786', 'step': 13890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:26.091036', 'step': 13890, 'epoch': 2} {'type': 'loss', 'content': 0.11595314741134644, 'timestamp': '2025-09-10 02:52:26.093148', 'step': 13891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:26.148267', 'step': 13891, 'epoch': 2} {'type': 'loss', 'content': 0.14147070050239563, 'timestamp': '2025-09-10 02:52:26.154456', 'step': 13892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:26.211303', 'step': 13892, 'epoch': 2} {'type': 'loss', 'content': 0.08230547606945038, 'timestamp': '2025-09-10 02:52:26.213324', 'step': 13893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:26.269411', 'step': 13893, 'epoch': 2} {'type': 'loss', 'content': 0.07054159045219421, 'timestamp': '2025-09-10 02:52:26.271320', 'step': 13894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:26.325756', 'step': 13894, 'epoch': 2} {'type': 'loss', 'content': 0.14615947008132935, 'timestamp': '2025-09-10 02:52:26.327717', 'step': 13895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:26.383962', 'step': 13895, 'epoch': 2} {'type': 'loss', 'content': 0.16674484312534332, 'timestamp': '2025-09-10 02:52:26.390032', 'step': 13896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:26.445351', 'step': 13896, 'epoch': 2} {'type': 'loss', 'content': 0.15511156618595123, 'timestamp': '2025-09-10 02:52:26.447370', 'step': 13897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:26.504145', 'step': 13897, 'epoch': 2} {'type': 'loss', 'content': 0.19049334526062012, 'timestamp': '2025-09-10 02:52:26.506053', 'step': 13898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:26.561472', 'step': 13898, 'epoch': 2} {'type': 'loss', 'content': 0.11171377450227737, 'timestamp': '2025-09-10 02:52:26.563498', 'step': 13899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:26.620367', 'step': 13899, 'epoch': 2} {'type': 'loss', 'content': 0.07104805111885071, 'timestamp': '2025-09-10 02:52:26.626643', 'step': 13900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:26.681340', 'step': 13900, 'epoch': 2} {'type': 'loss', 'content': 0.13729895651340485, 'timestamp': '2025-09-10 02:52:26.683334', 'step': 13901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:26.738975', 'step': 13901, 'epoch': 2} {'type': 'loss', 'content': 0.11551064252853394, 'timestamp': '2025-09-10 02:52:26.740947', 'step': 13902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:26.796003', 'step': 13902, 'epoch': 2} {'type': 'loss', 'content': 0.0788450762629509, 'timestamp': '2025-09-10 02:52:26.797934', 'step': 13903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:26.852716', 'step': 13903, 'epoch': 2} {'type': 'loss', 'content': 0.13823705911636353, 'timestamp': '2025-09-10 02:52:26.859014', 'step': 13904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:26.917121', 'step': 13904, 'epoch': 2} {'type': 'loss', 'content': 0.14610403776168823, 'timestamp': '2025-09-10 02:52:26.919426', 'step': 13905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:26.974370', 'step': 13905, 'epoch': 2} {'type': 'loss', 'content': 0.1777297556400299, 'timestamp': '2025-09-10 02:52:26.976514', 'step': 13906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:27.031400', 'step': 13906, 'epoch': 2} {'type': 'loss', 'content': 0.18526990711688995, 'timestamp': '2025-09-10 02:52:27.033435', 'step': 13907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:27.088470', 'step': 13907, 'epoch': 2} {'type': 'loss', 'content': 0.17523151636123657, 'timestamp': '2025-09-10 02:52:27.094657', 'step': 13908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:27.153556', 'step': 13908, 'epoch': 2} {'type': 'loss', 'content': 0.08623894304037094, 'timestamp': '2025-09-10 02:52:27.155621', 'step': 13909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:27.210843', 'step': 13909, 'epoch': 2} {'type': 'loss', 'content': 0.0778408795595169, 'timestamp': '2025-09-10 02:52:27.212960', 'step': 13910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:27.268230', 'step': 13910, 'epoch': 2} {'type': 'loss', 'content': 0.08416572958230972, 'timestamp': '2025-09-10 02:52:27.270349', 'step': 13911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:27.327164', 'step': 13911, 'epoch': 2} {'type': 'loss', 'content': 0.12220439314842224, 'timestamp': '2025-09-10 02:52:27.333364', 'step': 13912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:27.387382', 'step': 13912, 'epoch': 2} {'type': 'loss', 'content': 0.12634681165218353, 'timestamp': '2025-09-10 02:52:27.389419', 'step': 13913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:27.448285', 'step': 13913, 'epoch': 2} {'type': 'loss', 'content': 0.11469127237796783, 'timestamp': '2025-09-10 02:52:27.450418', 'step': 13914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:27.505219', 'step': 13914, 'epoch': 2} {'type': 'loss', 'content': 0.08491041511297226, 'timestamp': '2025-09-10 02:52:27.507314', 'step': 13915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:27.563560', 'step': 13915, 'epoch': 2} {'type': 'loss', 'content': 0.12560509145259857, 'timestamp': '2025-09-10 02:52:27.569601', 'step': 13916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:27.626963', 'step': 13916, 'epoch': 2} {'type': 'loss', 'content': 0.10779207944869995, 'timestamp': '2025-09-10 02:52:27.629171', 'step': 13917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:27.687368', 'step': 13917, 'epoch': 2} {'type': 'loss', 'content': 0.07761424779891968, 'timestamp': '2025-09-10 02:52:27.690553', 'step': 13918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:27.749168', 'step': 13918, 'epoch': 2} {'type': 'loss', 'content': 0.156245619058609, 'timestamp': '2025-09-10 02:52:27.755045', 'step': 13919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:27.813123', 'step': 13919, 'epoch': 2} {'type': 'loss', 'content': 0.07917577028274536, 'timestamp': '2025-09-10 02:52:27.819449', 'step': 13920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:27.874674', 'step': 13920, 'epoch': 2} {'type': 'loss', 'content': 0.11920293420553207, 'timestamp': '2025-09-10 02:52:27.877428', 'step': 13921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:27.934509', 'step': 13921, 'epoch': 2} {'type': 'loss', 'content': 0.07663074135780334, 'timestamp': '2025-09-10 02:52:27.936585', 'step': 13922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:27.996005', 'step': 13922, 'epoch': 2} {'type': 'loss', 'content': 0.14281868934631348, 'timestamp': '2025-09-10 02:52:27.998118', 'step': 13923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:28.053615', 'step': 13923, 'epoch': 2} {'type': 'loss', 'content': 0.14649727940559387, 'timestamp': '2025-09-10 02:52:28.059703', 'step': 13924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.113661', 'step': 13924, 'epoch': 2} {'type': 'loss', 'content': 0.15820394456386566, 'timestamp': '2025-09-10 02:52:28.119504', 'step': 13925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.187335', 'step': 13925, 'epoch': 2} {'type': 'loss', 'content': 0.053947146981954575, 'timestamp': '2025-09-10 02:52:28.192221', 'step': 13926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:28.251537', 'step': 13926, 'epoch': 2} {'type': 'loss', 'content': 0.08121171593666077, 'timestamp': '2025-09-10 02:52:28.253665', 'step': 13927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.311657', 'step': 13927, 'epoch': 2} {'type': 'loss', 'content': 0.17633754014968872, 'timestamp': '2025-09-10 02:52:28.317962', 'step': 13928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.375411', 'step': 13928, 'epoch': 2} {'type': 'loss', 'content': 0.10370928794145584, 'timestamp': '2025-09-10 02:52:28.378034', 'step': 13929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.434181', 'step': 13929, 'epoch': 2} {'type': 'loss', 'content': 0.05863938480615616, 'timestamp': '2025-09-10 02:52:28.436256', 'step': 13930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:28.490918', 'step': 13930, 'epoch': 2} {'type': 'loss', 'content': 0.10545282065868378, 'timestamp': '2025-09-10 02:52:28.493112', 'step': 13931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.547754', 'step': 13931, 'epoch': 2} {'type': 'loss', 'content': 0.05430855602025986, 'timestamp': '2025-09-10 02:52:28.553966', 'step': 13932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.609072', 'step': 13932, 'epoch': 2} {'type': 'loss', 'content': 0.08221634477376938, 'timestamp': '2025-09-10 02:52:28.611075', 'step': 13933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:28.666684', 'step': 13933, 'epoch': 2} {'type': 'loss', 'content': 0.09809911996126175, 'timestamp': '2025-09-10 02:52:28.668937', 'step': 13934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:28.725806', 'step': 13934, 'epoch': 2} {'type': 'loss', 'content': 0.13767504692077637, 'timestamp': '2025-09-10 02:52:28.728128', 'step': 13935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:28.785693', 'step': 13935, 'epoch': 2} {'type': 'loss', 'content': 0.0755356028676033, 'timestamp': '2025-09-10 02:52:28.791909', 'step': 13936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.847726', 'step': 13936, 'epoch': 2} {'type': 'loss', 'content': 0.09020976722240448, 'timestamp': '2025-09-10 02:52:28.849729', 'step': 13937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.904496', 'step': 13937, 'epoch': 2} {'type': 'loss', 'content': 0.09842737764120102, 'timestamp': '2025-09-10 02:52:28.906604', 'step': 13938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:28.960859', 'step': 13938, 'epoch': 2} {'type': 'loss', 'content': 0.1567399501800537, 'timestamp': '2025-09-10 02:52:28.963095', 'step': 13939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:29.018457', 'step': 13939, 'epoch': 2} {'type': 'loss', 'content': 0.11261392384767532, 'timestamp': '2025-09-10 02:52:29.024543', 'step': 13940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:29.080025', 'step': 13940, 'epoch': 2} {'type': 'loss', 'content': 0.17084601521492004, 'timestamp': '2025-09-10 02:52:29.082157', 'step': 13941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:29.137515', 'step': 13941, 'epoch': 2} {'type': 'loss', 'content': 0.1191461905837059, 'timestamp': '2025-09-10 02:52:29.139629', 'step': 13942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:29.196726', 'step': 13942, 'epoch': 2} {'type': 'loss', 'content': 0.12115688621997833, 'timestamp': '2025-09-10 02:52:29.198804', 'step': 13943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:29.255032', 'step': 13943, 'epoch': 2} {'type': 'loss', 'content': 0.10081282258033752, 'timestamp': '2025-09-10 02:52:29.261496', 'step': 13944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:29.318067', 'step': 13944, 'epoch': 2} {'type': 'loss', 'content': 0.1608918458223343, 'timestamp': '2025-09-10 02:52:29.320046', 'step': 13945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:29.375440', 'step': 13945, 'epoch': 2} {'type': 'loss', 'content': 0.11846444010734558, 'timestamp': '2025-09-10 02:52:29.377509', 'step': 13946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:29.434727', 'step': 13946, 'epoch': 2} {'type': 'loss', 'content': 0.15612463653087616, 'timestamp': '2025-09-10 02:52:29.436780', 'step': 13947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:29.492760', 'step': 13947, 'epoch': 2} {'type': 'loss', 'content': 0.10022461414337158, 'timestamp': '2025-09-10 02:52:29.499151', 'step': 13948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:29.553915', 'step': 13948, 'epoch': 2} {'type': 'loss', 'content': 0.10446722060441971, 'timestamp': '2025-09-10 02:52:29.556147', 'step': 13949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:29.610207', 'step': 13949, 'epoch': 2} {'type': 'loss', 'content': 0.08919323235750198, 'timestamp': '2025-09-10 02:52:29.612361', 'step': 13950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:29.665850', 'step': 13950, 'epoch': 2} {'type': 'loss', 'content': 0.08664007484912872, 'timestamp': '2025-09-10 02:52:29.667933', 'step': 13951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:29.724295', 'step': 13951, 'epoch': 2} {'type': 'loss', 'content': 0.11990675330162048, 'timestamp': '2025-09-10 02:52:29.730290', 'step': 13952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:29.786665', 'step': 13952, 'epoch': 2} {'type': 'loss', 'content': 0.16275867819786072, 'timestamp': '2025-09-10 02:52:29.788735', 'step': 13953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:29.845629', 'step': 13953, 'epoch': 2} {'type': 'loss', 'content': 0.10026320070028305, 'timestamp': '2025-09-10 02:52:29.847709', 'step': 13954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:29.903510', 'step': 13954, 'epoch': 2} {'type': 'loss', 'content': 0.12302252650260925, 'timestamp': '2025-09-10 02:52:29.905702', 'step': 13955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:29.964331', 'step': 13955, 'epoch': 2} {'type': 'loss', 'content': 0.10038015246391296, 'timestamp': '2025-09-10 02:52:29.970534', 'step': 13956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:30.025031', 'step': 13956, 'epoch': 2} {'type': 'loss', 'content': 0.11883904039859772, 'timestamp': '2025-09-10 02:52:30.027099', 'step': 13957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:30.082597', 'step': 13957, 'epoch': 2} {'type': 'loss', 'content': 0.1410091668367386, 'timestamp': '2025-09-10 02:52:30.084641', 'step': 13958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:30.138635', 'step': 13958, 'epoch': 2} {'type': 'loss', 'content': 0.14229020476341248, 'timestamp': '2025-09-10 02:52:30.140865', 'step': 13959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:30.196602', 'step': 13959, 'epoch': 2} {'type': 'loss', 'content': 0.2141224890947342, 'timestamp': '2025-09-10 02:52:30.202680', 'step': 13960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:30.257924', 'step': 13960, 'epoch': 2} {'type': 'loss', 'content': 0.16488423943519592, 'timestamp': '2025-09-10 02:52:30.260100', 'step': 13961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:30.313527', 'step': 13961, 'epoch': 2} {'type': 'loss', 'content': 0.1270524561405182, 'timestamp': '2025-09-10 02:52:30.316050', 'step': 13962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:30.370980', 'step': 13962, 'epoch': 2} {'type': 'loss', 'content': 0.129482701420784, 'timestamp': '2025-09-10 02:52:30.373178', 'step': 13963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:30.428391', 'step': 13963, 'epoch': 2} {'type': 'loss', 'content': 0.15923812985420227, 'timestamp': '2025-09-10 02:52:30.434776', 'step': 13964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:30.489396', 'step': 13964, 'epoch': 2} {'type': 'loss', 'content': 0.08657215535640717, 'timestamp': '2025-09-10 02:52:30.491479', 'step': 13965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:30.546339', 'step': 13965, 'epoch': 2} {'type': 'loss', 'content': 0.08575443178415298, 'timestamp': '2025-09-10 02:52:30.548467', 'step': 13966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:30.603352', 'step': 13966, 'epoch': 2} {'type': 'loss', 'content': 0.1228303387761116, 'timestamp': '2025-09-10 02:52:30.605467', 'step': 13967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:30.659670', 'step': 13967, 'epoch': 2} {'type': 'loss', 'content': 0.1695258468389511, 'timestamp': '2025-09-10 02:52:30.665781', 'step': 13968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:30.720230', 'step': 13968, 'epoch': 2} {'type': 'loss', 'content': 0.17236927151679993, 'timestamp': '2025-09-10 02:52:30.722285', 'step': 13969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:30.781642', 'step': 13969, 'epoch': 2} {'type': 'loss', 'content': 0.07835046201944351, 'timestamp': '2025-09-10 02:52:30.783845', 'step': 13970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:30.838571', 'step': 13970, 'epoch': 2} {'type': 'loss', 'content': 0.11675921082496643, 'timestamp': '2025-09-10 02:52:30.840769', 'step': 13971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:30.895569', 'step': 13971, 'epoch': 2} {'type': 'loss', 'content': 0.13411679863929749, 'timestamp': '2025-09-10 02:52:30.901910', 'step': 13972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:30.957151', 'step': 13972, 'epoch': 2} {'type': 'loss', 'content': 0.0600915290415287, 'timestamp': '2025-09-10 02:52:30.959407', 'step': 13973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:31.013938', 'step': 13973, 'epoch': 2} {'type': 'loss', 'content': 0.15288037061691284, 'timestamp': '2025-09-10 02:52:31.016192', 'step': 13974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:31.070093', 'step': 13974, 'epoch': 2} {'type': 'loss', 'content': 0.116812564432621, 'timestamp': '2025-09-10 02:52:31.072069', 'step': 13975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:31.125668', 'step': 13975, 'epoch': 2} {'type': 'loss', 'content': 0.15527354180812836, 'timestamp': '2025-09-10 02:52:31.131467', 'step': 13976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:31.184563', 'step': 13976, 'epoch': 2} {'type': 'loss', 'content': 0.08370806276798248, 'timestamp': '2025-09-10 02:52:31.187121', 'step': 13977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:31.242536', 'step': 13977, 'epoch': 2} {'type': 'loss', 'content': 0.1234361082315445, 'timestamp': '2025-09-10 02:52:31.244939', 'step': 13978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:31.300960', 'step': 13978, 'epoch': 2} {'type': 'loss', 'content': 0.17647004127502441, 'timestamp': '2025-09-10 02:52:31.303205', 'step': 13979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:31.358026', 'step': 13979, 'epoch': 2} {'type': 'loss', 'content': 0.09708847850561142, 'timestamp': '2025-09-10 02:52:31.364824', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:52:44.237022', 'step': 13980, 'epoch': 2} {'type': 'pplx', 'content': 13303.905542544337, 'timestamp': '2025-09-10 02:52:44.240041', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:44.294569', 'step': 13980, 'epoch': 2} {'type': 'loss', 'content': 0.08871874958276749, 'timestamp': '2025-09-10 02:52:44.296914', 'step': 13981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:44.353023', 'step': 13981, 'epoch': 2} {'type': 'loss', 'content': 0.14523828029632568, 'timestamp': '2025-09-10 02:52:44.355346', 'step': 13982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:44.411829', 'step': 13982, 'epoch': 2} {'type': 'loss', 'content': 0.10233356058597565, 'timestamp': '2025-09-10 02:52:44.414199', 'step': 13983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:44.468867', 'step': 13983, 'epoch': 2} {'type': 'loss', 'content': 0.15349461138248444, 'timestamp': '2025-09-10 02:52:44.475346', 'step': 13984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:44.529627', 'step': 13984, 'epoch': 2} {'type': 'loss', 'content': 0.08839812129735947, 'timestamp': '2025-09-10 02:52:44.531821', 'step': 13985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:44.585643', 'step': 13985, 'epoch': 2} {'type': 'loss', 'content': 0.0544896200299263, 'timestamp': '2025-09-10 02:52:44.588094', 'step': 13986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:44.642071', 'step': 13986, 'epoch': 2} {'type': 'loss', 'content': 0.17048555612564087, 'timestamp': '2025-09-10 02:52:44.644526', 'step': 13987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:44.698786', 'step': 13987, 'epoch': 2} {'type': 'loss', 'content': 0.07213987410068512, 'timestamp': '2025-09-10 02:52:44.704995', 'step': 13988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:44.758640', 'step': 13988, 'epoch': 2} {'type': 'loss', 'content': 0.11800084263086319, 'timestamp': '2025-09-10 02:52:44.760914', 'step': 13989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:44.815774', 'step': 13989, 'epoch': 2} {'type': 'loss', 'content': 0.23578932881355286, 'timestamp': '2025-09-10 02:52:44.818084', 'step': 13990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:44.871904', 'step': 13990, 'epoch': 2} {'type': 'loss', 'content': 0.1325947493314743, 'timestamp': '2025-09-10 02:52:44.874157', 'step': 13991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:44.928093', 'step': 13991, 'epoch': 2} {'type': 'loss', 'content': 0.12296950817108154, 'timestamp': '2025-09-10 02:52:44.934415', 'step': 13992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:44.987607', 'step': 13992, 'epoch': 2} {'type': 'loss', 'content': 0.0972055196762085, 'timestamp': '2025-09-10 02:52:44.989870', 'step': 13993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:45.043359', 'step': 13993, 'epoch': 2} {'type': 'loss', 'content': 0.09346339851617813, 'timestamp': '2025-09-10 02:52:45.045631', 'step': 13994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:45.099938', 'step': 13994, 'epoch': 2} {'type': 'loss', 'content': 0.039743293076753616, 'timestamp': '2025-09-10 02:52:45.102183', 'step': 13995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:45.156492', 'step': 13995, 'epoch': 2} {'type': 'loss', 'content': 0.10132042318582535, 'timestamp': '2025-09-10 02:52:45.162685', 'step': 13996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:45.215831', 'step': 13996, 'epoch': 2} {'type': 'loss', 'content': 0.04057711735367775, 'timestamp': '2025-09-10 02:52:45.218116', 'step': 13997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:45.271975', 'step': 13997, 'epoch': 2} {'type': 'loss', 'content': 0.11509045958518982, 'timestamp': '2025-09-10 02:52:45.274231', 'step': 13998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:45.328100', 'step': 13998, 'epoch': 2} {'type': 'loss', 'content': 0.13987156748771667, 'timestamp': '2025-09-10 02:52:45.330486', 'step': 13999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:45.384307', 'step': 13999, 'epoch': 2} {'type': 'loss', 'content': 0.10778628289699554, 'timestamp': '2025-09-10 02:52:45.390731', 'step': 14000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14000', 'timestamp': '2025-09-10 02:52:45.776535', 'step': 14000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:45.836298', 'step': 14000, 'epoch': 2} {'type': 'loss', 'content': 0.1542031615972519, 'timestamp': '2025-09-10 02:52:45.838716', 'step': 14001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:45.897208', 'step': 14001, 'epoch': 2} {'type': 'loss', 'content': 0.1100136786699295, 'timestamp': '2025-09-10 02:52:45.899635', 'step': 14002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:45.958077', 'step': 14002, 'epoch': 2} {'type': 'loss', 'content': 0.14303311705589294, 'timestamp': '2025-09-10 02:52:45.960358', 'step': 14003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:46.016694', 'step': 14003, 'epoch': 2} {'type': 'loss', 'content': 0.10886017978191376, 'timestamp': '2025-09-10 02:52:46.023475', 'step': 14004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:46.078934', 'step': 14004, 'epoch': 2} {'type': 'loss', 'content': 0.058658335357904434, 'timestamp': '2025-09-10 02:52:46.081290', 'step': 14005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:46.137260', 'step': 14005, 'epoch': 2} {'type': 'loss', 'content': 0.10211215913295746, 'timestamp': '2025-09-10 02:52:46.139659', 'step': 14006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:46.196005', 'step': 14006, 'epoch': 2} {'type': 'loss', 'content': 0.03592858090996742, 'timestamp': '2025-09-10 02:52:46.198319', 'step': 14007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:46.257298', 'step': 14007, 'epoch': 2} {'type': 'loss', 'content': 0.11211791634559631, 'timestamp': '2025-09-10 02:52:46.263938', 'step': 14008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:46.319021', 'step': 14008, 'epoch': 2} {'type': 'loss', 'content': 0.12287141382694244, 'timestamp': '2025-09-10 02:52:46.321120', 'step': 14009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:46.374997', 'step': 14009, 'epoch': 2} {'type': 'loss', 'content': 0.16411545872688293, 'timestamp': '2025-09-10 02:52:46.377306', 'step': 14010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:46.431170', 'step': 14010, 'epoch': 2} {'type': 'loss', 'content': 0.1383531391620636, 'timestamp': '2025-09-10 02:52:46.433791', 'step': 14011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:46.487215', 'step': 14011, 'epoch': 2} {'type': 'loss', 'content': 0.12245231121778488, 'timestamp': '2025-09-10 02:52:46.493921', 'step': 14012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:46.547334', 'step': 14012, 'epoch': 2} {'type': 'loss', 'content': 0.12820999324321747, 'timestamp': '2025-09-10 02:52:46.549751', 'step': 14013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:46.604552', 'step': 14013, 'epoch': 2} {'type': 'loss', 'content': 0.023626312613487244, 'timestamp': '2025-09-10 02:52:46.607035', 'step': 14014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:46.660911', 'step': 14014, 'epoch': 2} {'type': 'loss', 'content': 0.10266613960266113, 'timestamp': '2025-09-10 02:52:46.663190', 'step': 14015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:46.716398', 'step': 14015, 'epoch': 2} {'type': 'loss', 'content': 0.19175595045089722, 'timestamp': '2025-09-10 02:52:46.722434', 'step': 14016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:46.776159', 'step': 14016, 'epoch': 2} {'type': 'loss', 'content': 0.0787220224738121, 'timestamp': '2025-09-10 02:52:46.778195', 'step': 14017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:46.832289', 'step': 14017, 'epoch': 2} {'type': 'loss', 'content': 0.11217355728149414, 'timestamp': '2025-09-10 02:52:46.834342', 'step': 14018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:46.888767', 'step': 14018, 'epoch': 2} {'type': 'loss', 'content': 0.16150376200675964, 'timestamp': '2025-09-10 02:52:46.890934', 'step': 14019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:46.944740', 'step': 14019, 'epoch': 2} {'type': 'loss', 'content': 0.09106221050024033, 'timestamp': '2025-09-10 02:52:46.951006', 'step': 14020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:47.004625', 'step': 14020, 'epoch': 2} {'type': 'loss', 'content': 0.12306149303913116, 'timestamp': '2025-09-10 02:52:47.006872', 'step': 14021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:47.061091', 'step': 14021, 'epoch': 2} {'type': 'loss', 'content': 0.12459726631641388, 'timestamp': '2025-09-10 02:52:47.063360', 'step': 14022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:47.119213', 'step': 14022, 'epoch': 2} {'type': 'loss', 'content': 0.1432049423456192, 'timestamp': '2025-09-10 02:52:47.121934', 'step': 14023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:47.182366', 'step': 14023, 'epoch': 2} {'type': 'loss', 'content': 0.17529016733169556, 'timestamp': '2025-09-10 02:52:47.189784', 'step': 14024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:47.251834', 'step': 14024, 'epoch': 2} {'type': 'loss', 'content': 0.09833967685699463, 'timestamp': '2025-09-10 02:52:47.254147', 'step': 14025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:47.316830', 'step': 14025, 'epoch': 2} {'type': 'loss', 'content': 0.09599101543426514, 'timestamp': '2025-09-10 02:52:47.319203', 'step': 14026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:47.380004', 'step': 14026, 'epoch': 2} {'type': 'loss', 'content': 0.10792827606201172, 'timestamp': '2025-09-10 02:52:47.382354', 'step': 14027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:47.441896', 'step': 14027, 'epoch': 2} {'type': 'loss', 'content': 0.10260605067014694, 'timestamp': '2025-09-10 02:52:47.448974', 'step': 14028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:47.510485', 'step': 14028, 'epoch': 2} {'type': 'loss', 'content': 0.12914231419563293, 'timestamp': '2025-09-10 02:52:47.512880', 'step': 14029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:47.567798', 'step': 14029, 'epoch': 2} {'type': 'loss', 'content': 0.16206762194633484, 'timestamp': '2025-09-10 02:52:47.570016', 'step': 14030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:47.624883', 'step': 14030, 'epoch': 2} {'type': 'loss', 'content': 0.1202511116862297, 'timestamp': '2025-09-10 02:52:47.627266', 'step': 14031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:47.681735', 'step': 14031, 'epoch': 2} {'type': 'loss', 'content': 0.11766530573368073, 'timestamp': '2025-09-10 02:52:47.687926', 'step': 14032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:47.743128', 'step': 14032, 'epoch': 2} {'type': 'loss', 'content': 0.07458928972482681, 'timestamp': '2025-09-10 02:52:47.745460', 'step': 14033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:47.798613', 'step': 14033, 'epoch': 2} {'type': 'loss', 'content': 0.09326710551977158, 'timestamp': '2025-09-10 02:52:47.800929', 'step': 14034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:47.856192', 'step': 14034, 'epoch': 2} {'type': 'loss', 'content': 0.14759758114814758, 'timestamp': '2025-09-10 02:52:47.858451', 'step': 14035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:47.911866', 'step': 14035, 'epoch': 2} {'type': 'loss', 'content': 0.11198915541172028, 'timestamp': '2025-09-10 02:52:47.917922', 'step': 14036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:47.970889', 'step': 14036, 'epoch': 2} {'type': 'loss', 'content': 0.08703092485666275, 'timestamp': '2025-09-10 02:52:47.973183', 'step': 14037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:48.027738', 'step': 14037, 'epoch': 2} {'type': 'loss', 'content': 0.1053614616394043, 'timestamp': '2025-09-10 02:52:48.030061', 'step': 14038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:48.083837', 'step': 14038, 'epoch': 2} {'type': 'loss', 'content': 0.087308868765831, 'timestamp': '2025-09-10 02:52:48.086114', 'step': 14039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:48.139662', 'step': 14039, 'epoch': 2} {'type': 'loss', 'content': 0.16266685724258423, 'timestamp': '2025-09-10 02:52:48.145702', 'step': 14040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:48.198775', 'step': 14040, 'epoch': 2} {'type': 'loss', 'content': 0.15232834219932556, 'timestamp': '2025-09-10 02:52:48.201050', 'step': 14041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:48.254356', 'step': 14041, 'epoch': 2} {'type': 'loss', 'content': 0.07675792276859283, 'timestamp': '2025-09-10 02:52:48.256893', 'step': 14042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:48.311331', 'step': 14042, 'epoch': 2} {'type': 'loss', 'content': 0.07143818587064743, 'timestamp': '2025-09-10 02:52:48.313732', 'step': 14043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:48.366627', 'step': 14043, 'epoch': 2} {'type': 'loss', 'content': 0.13541965186595917, 'timestamp': '2025-09-10 02:52:48.372635', 'step': 14044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:48.426355', 'step': 14044, 'epoch': 2} {'type': 'loss', 'content': 0.20352645218372345, 'timestamp': '2025-09-10 02:52:48.428554', 'step': 14045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:48.482517', 'step': 14045, 'epoch': 2} {'type': 'loss', 'content': 0.04913463816046715, 'timestamp': '2025-09-10 02:52:48.484776', 'step': 14046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:48.538307', 'step': 14046, 'epoch': 2} {'type': 'loss', 'content': 0.1103198230266571, 'timestamp': '2025-09-10 02:52:48.540560', 'step': 14047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:48.594507', 'step': 14047, 'epoch': 2} {'type': 'loss', 'content': 0.14086660742759705, 'timestamp': '2025-09-10 02:52:48.600422', 'step': 14048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:48.653254', 'step': 14048, 'epoch': 2} {'type': 'loss', 'content': 0.11600061506032944, 'timestamp': '2025-09-10 02:52:48.655555', 'step': 14049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:48.709402', 'step': 14049, 'epoch': 2} {'type': 'loss', 'content': 0.0968887135386467, 'timestamp': '2025-09-10 02:52:48.711665', 'step': 14050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:48.768373', 'step': 14050, 'epoch': 2} {'type': 'loss', 'content': 0.12249614298343658, 'timestamp': '2025-09-10 02:52:48.770816', 'step': 14051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:48.829812', 'step': 14051, 'epoch': 2} {'type': 'loss', 'content': 0.1900760978460312, 'timestamp': '2025-09-10 02:52:48.835671', 'step': 14052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:48.888940', 'step': 14052, 'epoch': 2} {'type': 'loss', 'content': 0.08372151106595993, 'timestamp': '2025-09-10 02:52:48.891342', 'step': 14053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:48.944507', 'step': 14053, 'epoch': 2} {'type': 'loss', 'content': 0.12807102501392365, 'timestamp': '2025-09-10 02:52:48.946562', 'step': 14054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:48.999817', 'step': 14054, 'epoch': 2} {'type': 'loss', 'content': 0.1009589284658432, 'timestamp': '2025-09-10 02:52:49.001980', 'step': 14055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:49.057193', 'step': 14055, 'epoch': 2} {'type': 'loss', 'content': 0.08203984051942825, 'timestamp': '2025-09-10 02:52:49.063380', 'step': 14056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:49.118180', 'step': 14056, 'epoch': 2} {'type': 'loss', 'content': 0.1485816389322281, 'timestamp': '2025-09-10 02:52:49.120652', 'step': 14057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:49.174361', 'step': 14057, 'epoch': 2} {'type': 'loss', 'content': 0.03018328920006752, 'timestamp': '2025-09-10 02:52:49.176685', 'step': 14058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:49.230275', 'step': 14058, 'epoch': 2} {'type': 'loss', 'content': 0.06403355300426483, 'timestamp': '2025-09-10 02:52:49.232518', 'step': 14059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:49.286047', 'step': 14059, 'epoch': 2} {'type': 'loss', 'content': 0.07689405232667923, 'timestamp': '2025-09-10 02:52:49.291949', 'step': 14060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:49.345704', 'step': 14060, 'epoch': 2} {'type': 'loss', 'content': 0.10012288391590118, 'timestamp': '2025-09-10 02:52:49.348046', 'step': 14061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:49.401605', 'step': 14061, 'epoch': 2} {'type': 'loss', 'content': 0.12314619868993759, 'timestamp': '2025-09-10 02:52:49.403812', 'step': 14062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:49.459805', 'step': 14062, 'epoch': 2} {'type': 'loss', 'content': 0.16226904094219208, 'timestamp': '2025-09-10 02:52:49.462011', 'step': 14063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:49.517791', 'step': 14063, 'epoch': 2} {'type': 'loss', 'content': 0.22446787357330322, 'timestamp': '2025-09-10 02:52:49.523664', 'step': 14064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:49.576859', 'step': 14064, 'epoch': 2} {'type': 'loss', 'content': 0.057553231716156006, 'timestamp': '2025-09-10 02:52:49.579078', 'step': 14065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:49.633449', 'step': 14065, 'epoch': 2} {'type': 'loss', 'content': 0.07511646300554276, 'timestamp': '2025-09-10 02:52:49.635748', 'step': 14066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:49.689102', 'step': 14066, 'epoch': 2} {'type': 'loss', 'content': 0.07611407339572906, 'timestamp': '2025-09-10 02:52:49.691458', 'step': 14067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:49.745575', 'step': 14067, 'epoch': 2} {'type': 'loss', 'content': 0.12853996455669403, 'timestamp': '2025-09-10 02:52:49.751427', 'step': 14068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:49.804756', 'step': 14068, 'epoch': 2} {'type': 'loss', 'content': 0.12549318373203278, 'timestamp': '2025-09-10 02:52:49.806800', 'step': 14069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:49.860387', 'step': 14069, 'epoch': 2} {'type': 'loss', 'content': 0.18767403066158295, 'timestamp': '2025-09-10 02:52:49.862364', 'step': 14070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:49.917265', 'step': 14070, 'epoch': 2} {'type': 'loss', 'content': 0.1253870725631714, 'timestamp': '2025-09-10 02:52:49.919466', 'step': 14071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:49.972890', 'step': 14071, 'epoch': 2} {'type': 'loss', 'content': 0.09691255539655685, 'timestamp': '2025-09-10 02:52:49.979146', 'step': 14072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.032848', 'step': 14072, 'epoch': 2} {'type': 'loss', 'content': 0.11305349320173264, 'timestamp': '2025-09-10 02:52:50.035043', 'step': 14073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.089047', 'step': 14073, 'epoch': 2} {'type': 'loss', 'content': 0.10840049386024475, 'timestamp': '2025-09-10 02:52:50.091101', 'step': 14074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.145440', 'step': 14074, 'epoch': 2} {'type': 'loss', 'content': 0.06781944632530212, 'timestamp': '2025-09-10 02:52:50.147544', 'step': 14075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.200907', 'step': 14075, 'epoch': 2} {'type': 'loss', 'content': 0.14661437273025513, 'timestamp': '2025-09-10 02:52:50.206873', 'step': 14076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:50.260360', 'step': 14076, 'epoch': 2} {'type': 'loss', 'content': 0.0689973458647728, 'timestamp': '2025-09-10 02:52:50.263030', 'step': 14077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:50.316788', 'step': 14077, 'epoch': 2} {'type': 'loss', 'content': 0.03860285505652428, 'timestamp': '2025-09-10 02:52:50.318798', 'step': 14078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.372759', 'step': 14078, 'epoch': 2} {'type': 'loss', 'content': 0.06193209066987038, 'timestamp': '2025-09-10 02:52:50.375085', 'step': 14079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.429659', 'step': 14079, 'epoch': 2} {'type': 'loss', 'content': 0.06574933975934982, 'timestamp': '2025-09-10 02:52:50.435467', 'step': 14080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:50.488769', 'step': 14080, 'epoch': 2} {'type': 'loss', 'content': 0.10148292779922485, 'timestamp': '2025-09-10 02:52:50.491035', 'step': 14081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:50.544462', 'step': 14081, 'epoch': 2} {'type': 'loss', 'content': 0.10569103062152863, 'timestamp': '2025-09-10 02:52:50.546613', 'step': 14082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.599965', 'step': 14082, 'epoch': 2} {'type': 'loss', 'content': 0.17545078694820404, 'timestamp': '2025-09-10 02:52:50.602060', 'step': 14083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.657053', 'step': 14083, 'epoch': 2} {'type': 'loss', 'content': 0.06564483791589737, 'timestamp': '2025-09-10 02:52:50.663096', 'step': 14084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:50.715981', 'step': 14084, 'epoch': 2} {'type': 'loss', 'content': 0.15630356967449188, 'timestamp': '2025-09-10 02:52:50.718494', 'step': 14085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.771844', 'step': 14085, 'epoch': 2} {'type': 'loss', 'content': 0.13224747776985168, 'timestamp': '2025-09-10 02:52:50.774094', 'step': 14086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:50.828868', 'step': 14086, 'epoch': 2} {'type': 'loss', 'content': 0.05248308926820755, 'timestamp': '2025-09-10 02:52:50.831193', 'step': 14087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:50.883960', 'step': 14087, 'epoch': 2} {'type': 'loss', 'content': 0.12711140513420105, 'timestamp': '2025-09-10 02:52:50.889666', 'step': 14088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.942529', 'step': 14088, 'epoch': 2} {'type': 'loss', 'content': 0.19810020923614502, 'timestamp': '2025-09-10 02:52:50.944601', 'step': 14089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:50.997723', 'step': 14089, 'epoch': 2} {'type': 'loss', 'content': 0.10484223067760468, 'timestamp': '2025-09-10 02:52:50.999944', 'step': 14090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:51.053940', 'step': 14090, 'epoch': 2} {'type': 'loss', 'content': 0.13200631737709045, 'timestamp': '2025-09-10 02:52:51.056150', 'step': 14091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:51.110318', 'step': 14091, 'epoch': 2} {'type': 'loss', 'content': 0.08851367235183716, 'timestamp': '2025-09-10 02:52:51.116562', 'step': 14092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:51.170051', 'step': 14092, 'epoch': 2} {'type': 'loss', 'content': 0.08768972754478455, 'timestamp': '2025-09-10 02:52:51.172011', 'step': 14093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:51.226681', 'step': 14093, 'epoch': 2} {'type': 'loss', 'content': 0.08787331730127335, 'timestamp': '2025-09-10 02:52:51.228863', 'step': 14094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:51.283277', 'step': 14094, 'epoch': 2} {'type': 'loss', 'content': 0.1534816026687622, 'timestamp': '2025-09-10 02:52:51.285645', 'step': 14095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:51.339086', 'step': 14095, 'epoch': 2} {'type': 'loss', 'content': 0.1022782027721405, 'timestamp': '2025-09-10 02:52:51.345074', 'step': 14096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:51.399668', 'step': 14096, 'epoch': 2} {'type': 'loss', 'content': 0.029616303741931915, 'timestamp': '2025-09-10 02:52:51.402104', 'step': 14097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:51.455880', 'step': 14097, 'epoch': 2} {'type': 'loss', 'content': 0.09379010647535324, 'timestamp': '2025-09-10 02:52:51.458274', 'step': 14098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:51.512012', 'step': 14098, 'epoch': 2} {'type': 'loss', 'content': 0.10714266449213028, 'timestamp': '2025-09-10 02:52:51.514722', 'step': 14099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:51.568580', 'step': 14099, 'epoch': 2} {'type': 'loss', 'content': 0.14330647885799408, 'timestamp': '2025-09-10 02:52:51.574832', 'step': 14100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:51.628062', 'step': 14100, 'epoch': 2} {'type': 'loss', 'content': 0.201345756649971, 'timestamp': '2025-09-10 02:52:51.630568', 'step': 14101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:51.683988', 'step': 14101, 'epoch': 2} {'type': 'loss', 'content': 0.07358495891094208, 'timestamp': '2025-09-10 02:52:51.686310', 'step': 14102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:51.740808', 'step': 14102, 'epoch': 2} {'type': 'loss', 'content': 0.16929909586906433, 'timestamp': '2025-09-10 02:52:51.743190', 'step': 14103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:51.797501', 'step': 14103, 'epoch': 2} {'type': 'loss', 'content': 0.13115745782852173, 'timestamp': '2025-09-10 02:52:51.803609', 'step': 14104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:51.856945', 'step': 14104, 'epoch': 2} {'type': 'loss', 'content': 0.07337181270122528, 'timestamp': '2025-09-10 02:52:51.859238', 'step': 14105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:51.914856', 'step': 14105, 'epoch': 2} {'type': 'loss', 'content': 0.09013421088457108, 'timestamp': '2025-09-10 02:52:51.917152', 'step': 14106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:51.970641', 'step': 14106, 'epoch': 2} {'type': 'loss', 'content': 0.03853897005319595, 'timestamp': '2025-09-10 02:52:51.973159', 'step': 14107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:52.027524', 'step': 14107, 'epoch': 2} {'type': 'loss', 'content': 0.09409360587596893, 'timestamp': '2025-09-10 02:52:52.033461', 'step': 14108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:52.087091', 'step': 14108, 'epoch': 2} {'type': 'loss', 'content': 0.19629424810409546, 'timestamp': '2025-09-10 02:52:52.089525', 'step': 14109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:52.145190', 'step': 14109, 'epoch': 2} {'type': 'loss', 'content': 0.13834220170974731, 'timestamp': '2025-09-10 02:52:52.147633', 'step': 14110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:52.201354', 'step': 14110, 'epoch': 2} {'type': 'loss', 'content': 0.07790807634592056, 'timestamp': '2025-09-10 02:52:52.203613', 'step': 14111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:52.258759', 'step': 14111, 'epoch': 2} {'type': 'loss', 'content': 0.1307794153690338, 'timestamp': '2025-09-10 02:52:52.264865', 'step': 14112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:52.318709', 'step': 14112, 'epoch': 2} {'type': 'loss', 'content': 0.1351812481880188, 'timestamp': '2025-09-10 02:52:52.321048', 'step': 14113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:52.375205', 'step': 14113, 'epoch': 2} {'type': 'loss', 'content': 0.07871370017528534, 'timestamp': '2025-09-10 02:52:52.377809', 'step': 14114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:52.432651', 'step': 14114, 'epoch': 2} {'type': 'loss', 'content': 0.14041945338249207, 'timestamp': '2025-09-10 02:52:52.434950', 'step': 14115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:52.488741', 'step': 14115, 'epoch': 2} {'type': 'loss', 'content': 0.08476226031780243, 'timestamp': '2025-09-10 02:52:52.494645', 'step': 14116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:52.551878', 'step': 14116, 'epoch': 2} {'type': 'loss', 'content': 0.08297504484653473, 'timestamp': '2025-09-10 02:52:52.554021', 'step': 14117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:52.607570', 'step': 14117, 'epoch': 2} {'type': 'loss', 'content': 0.20770780742168427, 'timestamp': '2025-09-10 02:52:52.609829', 'step': 14118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:52.665635', 'step': 14118, 'epoch': 2} {'type': 'loss', 'content': 0.07372226566076279, 'timestamp': '2025-09-10 02:52:52.667869', 'step': 14119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:52.721783', 'step': 14119, 'epoch': 2} {'type': 'loss', 'content': 0.19322910904884338, 'timestamp': '2025-09-10 02:52:52.727762', 'step': 14120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:52.780888', 'step': 14120, 'epoch': 2} {'type': 'loss', 'content': 0.03366854041814804, 'timestamp': '2025-09-10 02:52:52.783165', 'step': 14121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:52.837069', 'step': 14121, 'epoch': 2} {'type': 'loss', 'content': 0.06154567375779152, 'timestamp': '2025-09-10 02:52:52.839452', 'step': 14122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:52.893303', 'step': 14122, 'epoch': 2} {'type': 'loss', 'content': 0.07760818302631378, 'timestamp': '2025-09-10 02:52:52.895603', 'step': 14123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:52.949501', 'step': 14123, 'epoch': 2} {'type': 'loss', 'content': 0.11533281952142715, 'timestamp': '2025-09-10 02:52:52.955616', 'step': 14124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:53.008851', 'step': 14124, 'epoch': 2} {'type': 'loss', 'content': 0.055098481476306915, 'timestamp': '2025-09-10 02:52:53.011215', 'step': 14125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:53.064059', 'step': 14125, 'epoch': 2} {'type': 'loss', 'content': 0.06150941178202629, 'timestamp': '2025-09-10 02:52:53.066464', 'step': 14126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:53.120074', 'step': 14126, 'epoch': 2} {'type': 'loss', 'content': 0.28990083932876587, 'timestamp': '2025-09-10 02:52:53.122031', 'step': 14127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:53.175708', 'step': 14127, 'epoch': 2} {'type': 'loss', 'content': 0.10708478093147278, 'timestamp': '2025-09-10 02:52:53.181748', 'step': 14128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:53.235012', 'step': 14128, 'epoch': 2} {'type': 'loss', 'content': 0.1058460921049118, 'timestamp': '2025-09-10 02:52:53.237449', 'step': 14129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:53.292069', 'step': 14129, 'epoch': 2} {'type': 'loss', 'content': 0.10815775394439697, 'timestamp': '2025-09-10 02:52:53.294561', 'step': 14130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:53.348138', 'step': 14130, 'epoch': 2} {'type': 'loss', 'content': 0.01416438166052103, 'timestamp': '2025-09-10 02:52:53.350478', 'step': 14131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:53.404902', 'step': 14131, 'epoch': 2} {'type': 'loss', 'content': 0.09314935654401779, 'timestamp': '2025-09-10 02:52:53.411158', 'step': 14132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:53.464940', 'step': 14132, 'epoch': 2} {'type': 'loss', 'content': 0.11512765288352966, 'timestamp': '2025-09-10 02:52:53.467289', 'step': 14133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:53.521273', 'step': 14133, 'epoch': 2} {'type': 'loss', 'content': 0.17039987444877625, 'timestamp': '2025-09-10 02:52:53.523534', 'step': 14134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:53.576987', 'step': 14134, 'epoch': 2} {'type': 'loss', 'content': 0.16225877404212952, 'timestamp': '2025-09-10 02:52:53.579293', 'step': 14135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:53.633889', 'step': 14135, 'epoch': 2} {'type': 'loss', 'content': 0.08596982806921005, 'timestamp': '2025-09-10 02:52:53.640123', 'step': 14136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:53.694353', 'step': 14136, 'epoch': 2} {'type': 'loss', 'content': 0.06189964711666107, 'timestamp': '2025-09-10 02:52:53.696664', 'step': 14137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:53.753424', 'step': 14137, 'epoch': 2} {'type': 'loss', 'content': 0.11829163134098053, 'timestamp': '2025-09-10 02:52:53.755713', 'step': 14138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:53.810140', 'step': 14138, 'epoch': 2} {'type': 'loss', 'content': 0.19311097264289856, 'timestamp': '2025-09-10 02:52:53.812418', 'step': 14139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:53.867666', 'step': 14139, 'epoch': 2} {'type': 'loss', 'content': 0.11611349135637283, 'timestamp': '2025-09-10 02:52:53.875310', 'step': 14140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:53.928578', 'step': 14140, 'epoch': 2} {'type': 'loss', 'content': 0.08083327114582062, 'timestamp': '2025-09-10 02:52:53.930821', 'step': 14141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:53.985334', 'step': 14141, 'epoch': 2} {'type': 'loss', 'content': 0.11359649896621704, 'timestamp': '2025-09-10 02:52:53.987669', 'step': 14142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:54.042173', 'step': 14142, 'epoch': 2} {'type': 'loss', 'content': 0.17303428053855896, 'timestamp': '2025-09-10 02:52:54.044496', 'step': 14143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:54.098232', 'step': 14143, 'epoch': 2} {'type': 'loss', 'content': 0.14997345209121704, 'timestamp': '2025-09-10 02:52:54.104941', 'step': 14144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:54.159396', 'step': 14144, 'epoch': 2} {'type': 'loss', 'content': 0.15514019131660461, 'timestamp': '2025-09-10 02:52:54.162144', 'step': 14145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:54.218265', 'step': 14145, 'epoch': 2} {'type': 'loss', 'content': 0.07502139359712601, 'timestamp': '2025-09-10 02:52:54.220494', 'step': 14146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:54.275616', 'step': 14146, 'epoch': 2} {'type': 'loss', 'content': 0.0770886167883873, 'timestamp': '2025-09-10 02:52:54.277770', 'step': 14147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:54.331516', 'step': 14147, 'epoch': 2} {'type': 'loss', 'content': 0.11762423068284988, 'timestamp': '2025-09-10 02:52:54.337521', 'step': 14148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:54.390984', 'step': 14148, 'epoch': 2} {'type': 'loss', 'content': 0.20215152204036713, 'timestamp': '2025-09-10 02:52:54.393153', 'step': 14149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:54.447200', 'step': 14149, 'epoch': 2} {'type': 'loss', 'content': 0.19766201078891754, 'timestamp': '2025-09-10 02:52:54.449395', 'step': 14150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:54.503195', 'step': 14150, 'epoch': 2} {'type': 'loss', 'content': 0.12824000418186188, 'timestamp': '2025-09-10 02:52:54.505507', 'step': 14151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:54.558837', 'step': 14151, 'epoch': 2} {'type': 'loss', 'content': 0.09245651215314865, 'timestamp': '2025-09-10 02:52:54.565238', 'step': 14152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:54.618605', 'step': 14152, 'epoch': 2} {'type': 'loss', 'content': 0.1449359506368637, 'timestamp': '2025-09-10 02:52:54.620732', 'step': 14153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:54.674381', 'step': 14153, 'epoch': 2} {'type': 'loss', 'content': 0.08236216753721237, 'timestamp': '2025-09-10 02:52:54.676651', 'step': 14154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:54.730691', 'step': 14154, 'epoch': 2} {'type': 'loss', 'content': 0.09887963533401489, 'timestamp': '2025-09-10 02:52:54.732818', 'step': 14155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:54.787521', 'step': 14155, 'epoch': 2} {'type': 'loss', 'content': 0.09740215539932251, 'timestamp': '2025-09-10 02:52:54.793602', 'step': 14156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:54.849134', 'step': 14156, 'epoch': 2} {'type': 'loss', 'content': 0.14543893933296204, 'timestamp': '2025-09-10 02:52:54.851314', 'step': 14157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:54.905999', 'step': 14157, 'epoch': 2} {'type': 'loss', 'content': 0.1495393067598343, 'timestamp': '2025-09-10 02:52:54.908424', 'step': 14158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:54.961977', 'step': 14158, 'epoch': 2} {'type': 'loss', 'content': 0.128851056098938, 'timestamp': '2025-09-10 02:52:54.964275', 'step': 14159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:55.018020', 'step': 14159, 'epoch': 2} {'type': 'loss', 'content': 0.07199382036924362, 'timestamp': '2025-09-10 02:52:55.023804', 'step': 14160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:55.076529', 'step': 14160, 'epoch': 2} {'type': 'loss', 'content': 0.14893478155136108, 'timestamp': '2025-09-10 02:52:55.078638', 'step': 14161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:55.131338', 'step': 14161, 'epoch': 2} {'type': 'loss', 'content': 0.1482521891593933, 'timestamp': '2025-09-10 02:52:55.133472', 'step': 14162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:55.187567', 'step': 14162, 'epoch': 2} {'type': 'loss', 'content': 0.08979707956314087, 'timestamp': '2025-09-10 02:52:55.189742', 'step': 14163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:55.243986', 'step': 14163, 'epoch': 2} {'type': 'loss', 'content': 0.08778822422027588, 'timestamp': '2025-09-10 02:52:55.249843', 'step': 14164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:55.302844', 'step': 14164, 'epoch': 2} {'type': 'loss', 'content': 0.18003401160240173, 'timestamp': '2025-09-10 02:52:55.305156', 'step': 14165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:55.358930', 'step': 14165, 'epoch': 2} {'type': 'loss', 'content': 0.10276351124048233, 'timestamp': '2025-09-10 02:52:55.361095', 'step': 14166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:55.414581', 'step': 14166, 'epoch': 2} {'type': 'loss', 'content': 0.13206523656845093, 'timestamp': '2025-09-10 02:52:55.416964', 'step': 14167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:55.470716', 'step': 14167, 'epoch': 2} {'type': 'loss', 'content': 0.1458076685667038, 'timestamp': '2025-09-10 02:52:55.476505', 'step': 14168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:55.538082', 'step': 14168, 'epoch': 2} {'type': 'loss', 'content': 0.04231515899300575, 'timestamp': '2025-09-10 02:52:55.540074', 'step': 14169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:55.593435', 'step': 14169, 'epoch': 2} {'type': 'loss', 'content': 0.17629238963127136, 'timestamp': '2025-09-10 02:52:55.595493', 'step': 14170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:55.648895', 'step': 14170, 'epoch': 2} {'type': 'loss', 'content': 0.20430827140808105, 'timestamp': '2025-09-10 02:52:55.651118', 'step': 14171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:55.704912', 'step': 14171, 'epoch': 2} {'type': 'loss', 'content': 0.16163764894008636, 'timestamp': '2025-09-10 02:52:55.710908', 'step': 14172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:55.764984', 'step': 14172, 'epoch': 2} {'type': 'loss', 'content': 0.07880675792694092, 'timestamp': '2025-09-10 02:52:55.767405', 'step': 14173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:55.820362', 'step': 14173, 'epoch': 2} {'type': 'loss', 'content': 0.12763474881649017, 'timestamp': '2025-09-10 02:52:55.822407', 'step': 14174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:55.875969', 'step': 14174, 'epoch': 2} {'type': 'loss', 'content': 0.08085447549819946, 'timestamp': '2025-09-10 02:52:55.878087', 'step': 14175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:55.931395', 'step': 14175, 'epoch': 2} {'type': 'loss', 'content': 0.1433870792388916, 'timestamp': '2025-09-10 02:52:55.937252', 'step': 14176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:55.990332', 'step': 14176, 'epoch': 2} {'type': 'loss', 'content': 0.1336696296930313, 'timestamp': '2025-09-10 02:52:55.992571', 'step': 14177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:56.045586', 'step': 14177, 'epoch': 2} {'type': 'loss', 'content': 0.08121421933174133, 'timestamp': '2025-09-10 02:52:56.047573', 'step': 14178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:56.100493', 'step': 14178, 'epoch': 2} {'type': 'loss', 'content': 0.132346048951149, 'timestamp': '2025-09-10 02:52:56.102584', 'step': 14179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:56.155386', 'step': 14179, 'epoch': 2} {'type': 'loss', 'content': 0.10326240956783295, 'timestamp': '2025-09-10 02:52:56.161227', 'step': 14180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:56.214405', 'step': 14180, 'epoch': 2} {'type': 'loss', 'content': 0.09756119549274445, 'timestamp': '2025-09-10 02:52:56.216626', 'step': 14181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:56.270694', 'step': 14181, 'epoch': 2} {'type': 'loss', 'content': 0.22221072018146515, 'timestamp': '2025-09-10 02:52:56.272849', 'step': 14182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:56.326517', 'step': 14182, 'epoch': 2} {'type': 'loss', 'content': 0.1514967530965805, 'timestamp': '2025-09-10 02:52:56.328577', 'step': 14183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:56.381766', 'step': 14183, 'epoch': 2} {'type': 'loss', 'content': 0.06578560918569565, 'timestamp': '2025-09-10 02:52:56.387871', 'step': 14184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:56.440504', 'step': 14184, 'epoch': 2} {'type': 'loss', 'content': 0.07172004878520966, 'timestamp': '2025-09-10 02:52:56.442572', 'step': 14185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:56.495990', 'step': 14185, 'epoch': 2} {'type': 'loss', 'content': 0.06616802513599396, 'timestamp': '2025-09-10 02:52:56.498115', 'step': 14186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:56.552024', 'step': 14186, 'epoch': 2} {'type': 'loss', 'content': 0.08273985981941223, 'timestamp': '2025-09-10 02:52:56.554236', 'step': 14187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:56.607133', 'step': 14187, 'epoch': 2} {'type': 'loss', 'content': 0.06223021447658539, 'timestamp': '2025-09-10 02:52:56.612982', 'step': 14188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:56.665390', 'step': 14188, 'epoch': 2} {'type': 'loss', 'content': 0.11001850664615631, 'timestamp': '2025-09-10 02:52:56.667376', 'step': 14189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:56.720382', 'step': 14189, 'epoch': 2} {'type': 'loss', 'content': 0.17254896461963654, 'timestamp': '2025-09-10 02:52:56.722632', 'step': 14190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:56.780281', 'step': 14190, 'epoch': 2} {'type': 'loss', 'content': 0.19710272550582886, 'timestamp': '2025-09-10 02:52:56.782365', 'step': 14191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:56.839858', 'step': 14191, 'epoch': 2} {'type': 'loss', 'content': 0.1699775904417038, 'timestamp': '2025-09-10 02:52:56.846357', 'step': 14192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:56.902305', 'step': 14192, 'epoch': 2} {'type': 'loss', 'content': 0.09867037832736969, 'timestamp': '2025-09-10 02:52:56.904467', 'step': 14193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:56.959932', 'step': 14193, 'epoch': 2} {'type': 'loss', 'content': 0.11350487917661667, 'timestamp': '2025-09-10 02:52:56.961997', 'step': 14194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.017123', 'step': 14194, 'epoch': 2} {'type': 'loss', 'content': 0.03885216638445854, 'timestamp': '2025-09-10 02:52:57.019180', 'step': 14195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.074506', 'step': 14195, 'epoch': 2} {'type': 'loss', 'content': 0.10364212840795517, 'timestamp': '2025-09-10 02:52:57.080523', 'step': 14196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.133811', 'step': 14196, 'epoch': 2} {'type': 'loss', 'content': 0.14849016070365906, 'timestamp': '2025-09-10 02:52:57.136408', 'step': 14197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.189103', 'step': 14197, 'epoch': 2} {'type': 'loss', 'content': 0.13025107979774475, 'timestamp': '2025-09-10 02:52:57.191353', 'step': 14198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:57.244799', 'step': 14198, 'epoch': 2} {'type': 'loss', 'content': 0.1295624077320099, 'timestamp': '2025-09-10 02:52:57.247045', 'step': 14199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:57.300906', 'step': 14199, 'epoch': 2} {'type': 'loss', 'content': 0.11277037858963013, 'timestamp': '2025-09-10 02:52:57.306974', 'step': 14200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.360778', 'step': 14200, 'epoch': 2} {'type': 'loss', 'content': 0.13425883650779724, 'timestamp': '2025-09-10 02:52:57.363194', 'step': 14201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:57.417446', 'step': 14201, 'epoch': 2} {'type': 'loss', 'content': 0.09486771374940872, 'timestamp': '2025-09-10 02:52:57.419871', 'step': 14202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:57.475548', 'step': 14202, 'epoch': 2} {'type': 'loss', 'content': 0.10134968161582947, 'timestamp': '2025-09-10 02:52:57.477642', 'step': 14203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.532230', 'step': 14203, 'epoch': 2} {'type': 'loss', 'content': 0.11109588295221329, 'timestamp': '2025-09-10 02:52:57.538431', 'step': 14204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:57.591163', 'step': 14204, 'epoch': 2} {'type': 'loss', 'content': 0.09787577390670776, 'timestamp': '2025-09-10 02:52:57.593394', 'step': 14205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.647012', 'step': 14205, 'epoch': 2} {'type': 'loss', 'content': 0.11265834420919418, 'timestamp': '2025-09-10 02:52:57.649197', 'step': 14206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:57.702954', 'step': 14206, 'epoch': 2} {'type': 'loss', 'content': 0.14406807720661163, 'timestamp': '2025-09-10 02:52:57.705029', 'step': 14207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:57.758803', 'step': 14207, 'epoch': 2} {'type': 'loss', 'content': 0.12394049763679504, 'timestamp': '2025-09-10 02:52:57.765001', 'step': 14208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.820790', 'step': 14208, 'epoch': 2} {'type': 'loss', 'content': 0.04873046651482582, 'timestamp': '2025-09-10 02:52:57.822997', 'step': 14209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:57.877853', 'step': 14209, 'epoch': 2} {'type': 'loss', 'content': 0.18280881643295288, 'timestamp': '2025-09-10 02:52:57.879944', 'step': 14210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.935822', 'step': 14210, 'epoch': 2} {'type': 'loss', 'content': 0.07611411064863205, 'timestamp': '2025-09-10 02:52:57.938033', 'step': 14211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:57.992050', 'step': 14211, 'epoch': 2} {'type': 'loss', 'content': 0.1711026132106781, 'timestamp': '2025-09-10 02:52:57.998315', 'step': 14212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:58.050415', 'step': 14212, 'epoch': 2} {'type': 'loss', 'content': 0.14720842242240906, 'timestamp': '2025-09-10 02:52:58.052605', 'step': 14213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:58.107282', 'step': 14213, 'epoch': 2} {'type': 'loss', 'content': 0.0730256661772728, 'timestamp': '2025-09-10 02:52:58.109558', 'step': 14214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:58.163113', 'step': 14214, 'epoch': 2} {'type': 'loss', 'content': 0.14129357039928436, 'timestamp': '2025-09-10 02:52:58.165529', 'step': 14215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:52:58.218512', 'step': 14215, 'epoch': 2} {'type': 'loss', 'content': 0.12754355370998383, 'timestamp': '2025-09-10 02:52:58.224474', 'step': 14216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:58.278759', 'step': 14216, 'epoch': 2} {'type': 'loss', 'content': 0.10729886591434479, 'timestamp': '2025-09-10 02:52:58.281062', 'step': 14217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:58.335501', 'step': 14217, 'epoch': 2} {'type': 'loss', 'content': 0.03376268595457077, 'timestamp': '2025-09-10 02:52:58.337794', 'step': 14218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:58.391764', 'step': 14218, 'epoch': 2} {'type': 'loss', 'content': 0.09832914173603058, 'timestamp': '2025-09-10 02:52:58.394113', 'step': 14219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:58.448184', 'step': 14219, 'epoch': 2} {'type': 'loss', 'content': 0.11030358076095581, 'timestamp': '2025-09-10 02:52:58.454397', 'step': 14220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:58.507657', 'step': 14220, 'epoch': 2} {'type': 'loss', 'content': 0.13063855469226837, 'timestamp': '2025-09-10 02:52:58.509854', 'step': 14221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:58.563900', 'step': 14221, 'epoch': 2} {'type': 'loss', 'content': 0.1245797649025917, 'timestamp': '2025-09-10 02:52:58.565937', 'step': 14222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:58.619742', 'step': 14222, 'epoch': 2} {'type': 'loss', 'content': 0.06975828111171722, 'timestamp': '2025-09-10 02:52:58.621913', 'step': 14223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:58.676140', 'step': 14223, 'epoch': 2} {'type': 'loss', 'content': 0.06416618824005127, 'timestamp': '2025-09-10 02:52:58.681927', 'step': 14224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:58.734814', 'step': 14224, 'epoch': 2} {'type': 'loss', 'content': 0.1430487334728241, 'timestamp': '2025-09-10 02:52:58.736835', 'step': 14225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:58.789686', 'step': 14225, 'epoch': 2} {'type': 'loss', 'content': 0.12188487499952316, 'timestamp': '2025-09-10 02:52:58.791736', 'step': 14226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:58.845602', 'step': 14226, 'epoch': 2} {'type': 'loss', 'content': 0.09643317759037018, 'timestamp': '2025-09-10 02:52:58.847748', 'step': 14227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:58.900774', 'step': 14227, 'epoch': 2} {'type': 'loss', 'content': 0.17793092131614685, 'timestamp': '2025-09-10 02:52:58.906395', 'step': 14228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:58.959375', 'step': 14228, 'epoch': 2} {'type': 'loss', 'content': 0.03729378432035446, 'timestamp': '2025-09-10 02:52:58.961727', 'step': 14229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:59.015244', 'step': 14229, 'epoch': 2} {'type': 'loss', 'content': 0.2014862298965454, 'timestamp': '2025-09-10 02:52:59.017623', 'step': 14230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:59.071322', 'step': 14230, 'epoch': 2} {'type': 'loss', 'content': 0.07664530724287033, 'timestamp': '2025-09-10 02:52:59.073491', 'step': 14231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:59.126712', 'step': 14231, 'epoch': 2} {'type': 'loss', 'content': 0.1683202087879181, 'timestamp': '2025-09-10 02:52:59.132495', 'step': 14232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:59.185505', 'step': 14232, 'epoch': 2} {'type': 'loss', 'content': 0.054418813437223434, 'timestamp': '2025-09-10 02:52:59.187589', 'step': 14233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:59.240502', 'step': 14233, 'epoch': 2} {'type': 'loss', 'content': 0.156826913356781, 'timestamp': '2025-09-10 02:52:59.242557', 'step': 14234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:59.295546', 'step': 14234, 'epoch': 2} {'type': 'loss', 'content': 0.03305890038609505, 'timestamp': '2025-09-10 02:52:59.297682', 'step': 14235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:59.350783', 'step': 14235, 'epoch': 2} {'type': 'loss', 'content': 0.08502019941806793, 'timestamp': '2025-09-10 02:52:59.356431', 'step': 14236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:59.409589', 'step': 14236, 'epoch': 2} {'type': 'loss', 'content': 0.08242570608854294, 'timestamp': '2025-09-10 02:52:59.411666', 'step': 14237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:59.464693', 'step': 14237, 'epoch': 2} {'type': 'loss', 'content': 0.13452377915382385, 'timestamp': '2025-09-10 02:52:59.466986', 'step': 14238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:59.520773', 'step': 14238, 'epoch': 2} {'type': 'loss', 'content': 0.08618053048849106, 'timestamp': '2025-09-10 02:52:59.522930', 'step': 14239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:59.578031', 'step': 14239, 'epoch': 2} {'type': 'loss', 'content': 0.12410683184862137, 'timestamp': '2025-09-10 02:52:59.584076', 'step': 14240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:59.636950', 'step': 14240, 'epoch': 2} {'type': 'loss', 'content': 0.045475106686353683, 'timestamp': '2025-09-10 02:52:59.639107', 'step': 14241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:59.692615', 'step': 14241, 'epoch': 2} {'type': 'loss', 'content': 0.05672628805041313, 'timestamp': '2025-09-10 02:52:59.694668', 'step': 14242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:59.747614', 'step': 14242, 'epoch': 2} {'type': 'loss', 'content': 0.18740563094615936, 'timestamp': '2025-09-10 02:52:59.749994', 'step': 14243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:52:59.803621', 'step': 14243, 'epoch': 2} {'type': 'loss', 'content': 0.10551955550909042, 'timestamp': '2025-09-10 02:52:59.809490', 'step': 14244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:59.862653', 'step': 14244, 'epoch': 2} {'type': 'loss', 'content': 0.1475047916173935, 'timestamp': '2025-09-10 02:52:59.864716', 'step': 14245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:52:59.917498', 'step': 14245, 'epoch': 2} {'type': 'loss', 'content': 0.052253011614084244, 'timestamp': '2025-09-10 02:52:59.919724', 'step': 14246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:52:59.972290', 'step': 14246, 'epoch': 2} {'type': 'loss', 'content': 0.21192798018455505, 'timestamp': '2025-09-10 02:52:59.974345', 'step': 14247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:00.030577', 'step': 14247, 'epoch': 2} {'type': 'loss', 'content': 0.08934126794338226, 'timestamp': '2025-09-10 02:53:00.036627', 'step': 14248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:00.089203', 'step': 14248, 'epoch': 2} {'type': 'loss', 'content': 0.11725497990846634, 'timestamp': '2025-09-10 02:53:00.091456', 'step': 14249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:00.145538', 'step': 14249, 'epoch': 2} {'type': 'loss', 'content': 0.13934287428855896, 'timestamp': '2025-09-10 02:53:00.150324', 'step': 14250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:00.210160', 'step': 14250, 'epoch': 2} {'type': 'loss', 'content': 0.11126051843166351, 'timestamp': '2025-09-10 02:53:00.212272', 'step': 14251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:00.265611', 'step': 14251, 'epoch': 2} {'type': 'loss', 'content': 0.09721366316080093, 'timestamp': '2025-09-10 02:53:00.271545', 'step': 14252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:00.325121', 'step': 14252, 'epoch': 2} {'type': 'loss', 'content': 0.09417037665843964, 'timestamp': '2025-09-10 02:53:00.327414', 'step': 14253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:00.380769', 'step': 14253, 'epoch': 2} {'type': 'loss', 'content': 0.1289382129907608, 'timestamp': '2025-09-10 02:53:00.382966', 'step': 14254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:00.436634', 'step': 14254, 'epoch': 2} {'type': 'loss', 'content': 0.058151621371507645, 'timestamp': '2025-09-10 02:53:00.438837', 'step': 14255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:00.492512', 'step': 14255, 'epoch': 2} {'type': 'loss', 'content': 0.11179843544960022, 'timestamp': '2025-09-10 02:53:00.498241', 'step': 14256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:00.566171', 'step': 14256, 'epoch': 2} {'type': 'loss', 'content': 0.13707761466503143, 'timestamp': '2025-09-10 02:53:00.568197', 'step': 14257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:00.623022', 'step': 14257, 'epoch': 2} {'type': 'loss', 'content': 0.16060857474803925, 'timestamp': '2025-09-10 02:53:00.625213', 'step': 14258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:00.680074', 'step': 14258, 'epoch': 2} {'type': 'loss', 'content': 0.0900932028889656, 'timestamp': '2025-09-10 02:53:00.682504', 'step': 14259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:00.735961', 'step': 14259, 'epoch': 2} {'type': 'loss', 'content': 0.20882727205753326, 'timestamp': '2025-09-10 02:53:00.741847', 'step': 14260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:00.794938', 'step': 14260, 'epoch': 2} {'type': 'loss', 'content': 0.12301777303218842, 'timestamp': '2025-09-10 02:53:00.797083', 'step': 14261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:00.850999', 'step': 14261, 'epoch': 2} {'type': 'loss', 'content': 0.15935222804546356, 'timestamp': '2025-09-10 02:53:00.853177', 'step': 14262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:00.906443', 'step': 14262, 'epoch': 2} {'type': 'loss', 'content': 0.16737914085388184, 'timestamp': '2025-09-10 02:53:00.908678', 'step': 14263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:00.961530', 'step': 14263, 'epoch': 2} {'type': 'loss', 'content': 0.07121080160140991, 'timestamp': '2025-09-10 02:53:00.972293', 'step': 14264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:01.027049', 'step': 14264, 'epoch': 2} {'type': 'loss', 'content': 0.1517745852470398, 'timestamp': '2025-09-10 02:53:01.029166', 'step': 14265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:01.082093', 'step': 14265, 'epoch': 2} {'type': 'loss', 'content': 0.10650583356618881, 'timestamp': '2025-09-10 02:53:01.085578', 'step': 14266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:01.140705', 'step': 14266, 'epoch': 2} {'type': 'loss', 'content': 0.05473535507917404, 'timestamp': '2025-09-10 02:53:01.147098', 'step': 14267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:01.204846', 'step': 14267, 'epoch': 2} {'type': 'loss', 'content': 0.11094330996274948, 'timestamp': '2025-09-10 02:53:01.210802', 'step': 14268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:01.267700', 'step': 14268, 'epoch': 2} {'type': 'loss', 'content': 0.12250323593616486, 'timestamp': '2025-09-10 02:53:01.269862', 'step': 14269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:01.323434', 'step': 14269, 'epoch': 2} {'type': 'loss', 'content': 0.13312362134456635, 'timestamp': '2025-09-10 02:53:01.325788', 'step': 14270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:01.384014', 'step': 14270, 'epoch': 2} {'type': 'loss', 'content': 0.10238911956548691, 'timestamp': '2025-09-10 02:53:01.387265', 'step': 14271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:01.442237', 'step': 14271, 'epoch': 2} {'type': 'loss', 'content': 0.14637461304664612, 'timestamp': '2025-09-10 02:53:01.448320', 'step': 14272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:01.502808', 'step': 14272, 'epoch': 2} {'type': 'loss', 'content': 0.08863640576601028, 'timestamp': '2025-09-10 02:53:01.506792', 'step': 14273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:01.563663', 'step': 14273, 'epoch': 2} {'type': 'loss', 'content': 0.06791292130947113, 'timestamp': '2025-09-10 02:53:01.565870', 'step': 14274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:01.624708', 'step': 14274, 'epoch': 2} {'type': 'loss', 'content': 0.08928348124027252, 'timestamp': '2025-09-10 02:53:01.627624', 'step': 14275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:01.685527', 'step': 14275, 'epoch': 2} {'type': 'loss', 'content': 0.0654243752360344, 'timestamp': '2025-09-10 02:53:01.691647', 'step': 14276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:01.746116', 'step': 14276, 'epoch': 2} {'type': 'loss', 'content': 0.11053064465522766, 'timestamp': '2025-09-10 02:53:01.748466', 'step': 14277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:01.805059', 'step': 14277, 'epoch': 2} {'type': 'loss', 'content': 0.12781532108783722, 'timestamp': '2025-09-10 02:53:01.807332', 'step': 14278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:01.863341', 'step': 14278, 'epoch': 2} {'type': 'loss', 'content': 0.14403744041919708, 'timestamp': '2025-09-10 02:53:01.865741', 'step': 14279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:01.920753', 'step': 14279, 'epoch': 2} {'type': 'loss', 'content': 0.088162362575531, 'timestamp': '2025-09-10 02:53:01.926823', 'step': 14280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:01.980158', 'step': 14280, 'epoch': 2} {'type': 'loss', 'content': 0.100352443754673, 'timestamp': '2025-09-10 02:53:01.982296', 'step': 14281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:02.036160', 'step': 14281, 'epoch': 2} {'type': 'loss', 'content': 0.08629018068313599, 'timestamp': '2025-09-10 02:53:02.038474', 'step': 14282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:02.093550', 'step': 14282, 'epoch': 2} {'type': 'loss', 'content': 0.0791839137673378, 'timestamp': '2025-09-10 02:53:02.095870', 'step': 14283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:02.150793', 'step': 14283, 'epoch': 2} {'type': 'loss', 'content': 0.15285183489322662, 'timestamp': '2025-09-10 02:53:02.156675', 'step': 14284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:02.210018', 'step': 14284, 'epoch': 2} {'type': 'loss', 'content': 0.12005244195461273, 'timestamp': '2025-09-10 02:53:02.212180', 'step': 14285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:02.265917', 'step': 14285, 'epoch': 2} {'type': 'loss', 'content': 0.17001047730445862, 'timestamp': '2025-09-10 02:53:02.268273', 'step': 14286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:02.322204', 'step': 14286, 'epoch': 2} {'type': 'loss', 'content': 0.18269769847393036, 'timestamp': '2025-09-10 02:53:02.324480', 'step': 14287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:02.380797', 'step': 14287, 'epoch': 2} {'type': 'loss', 'content': 0.0742708295583725, 'timestamp': '2025-09-10 02:53:02.386683', 'step': 14288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:02.440786', 'step': 14288, 'epoch': 2} {'type': 'loss', 'content': 0.10686460137367249, 'timestamp': '2025-09-10 02:53:02.443027', 'step': 14289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:02.496449', 'step': 14289, 'epoch': 2} {'type': 'loss', 'content': 0.09010405838489532, 'timestamp': '2025-09-10 02:53:02.498592', 'step': 14290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:02.552032', 'step': 14290, 'epoch': 2} {'type': 'loss', 'content': 0.12624576687812805, 'timestamp': '2025-09-10 02:53:02.554115', 'step': 14291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:02.608972', 'step': 14291, 'epoch': 2} {'type': 'loss', 'content': 0.06048118695616722, 'timestamp': '2025-09-10 02:53:02.614877', 'step': 14292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:02.668389', 'step': 14292, 'epoch': 2} {'type': 'loss', 'content': 0.2065003216266632, 'timestamp': '2025-09-10 02:53:02.670472', 'step': 14293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:02.724651', 'step': 14293, 'epoch': 2} {'type': 'loss', 'content': 0.12635932862758636, 'timestamp': '2025-09-10 02:53:02.726720', 'step': 14294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:02.780540', 'step': 14294, 'epoch': 2} {'type': 'loss', 'content': 0.07635601609945297, 'timestamp': '2025-09-10 02:53:02.782660', 'step': 14295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:02.835983', 'step': 14295, 'epoch': 2} {'type': 'loss', 'content': 0.08557197451591492, 'timestamp': '2025-09-10 02:53:02.842021', 'step': 14296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:02.897007', 'step': 14296, 'epoch': 2} {'type': 'loss', 'content': 0.16489942371845245, 'timestamp': '2025-09-10 02:53:02.899394', 'step': 14297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:02.954195', 'step': 14297, 'epoch': 2} {'type': 'loss', 'content': 0.097986601293087, 'timestamp': '2025-09-10 02:53:02.956533', 'step': 14298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:03.010378', 'step': 14298, 'epoch': 2} {'type': 'loss', 'content': 0.13585877418518066, 'timestamp': '2025-09-10 02:53:03.012687', 'step': 14299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:03.066188', 'step': 14299, 'epoch': 2} {'type': 'loss', 'content': 0.08438139408826828, 'timestamp': '2025-09-10 02:53:03.072220', 'step': 14300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:03.125317', 'step': 14300, 'epoch': 2} {'type': 'loss', 'content': 0.10955478250980377, 'timestamp': '2025-09-10 02:53:03.127636', 'step': 14301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:03.180828', 'step': 14301, 'epoch': 2} {'type': 'loss', 'content': 0.0888618677854538, 'timestamp': '2025-09-10 02:53:03.182960', 'step': 14302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:03.238833', 'step': 14302, 'epoch': 2} {'type': 'loss', 'content': 0.11300142854452133, 'timestamp': '2025-09-10 02:53:03.240863', 'step': 14303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:03.294400', 'step': 14303, 'epoch': 2} {'type': 'loss', 'content': 0.10471811145544052, 'timestamp': '2025-09-10 02:53:03.300294', 'step': 14304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:03.353190', 'step': 14304, 'epoch': 2} {'type': 'loss', 'content': 0.13128690421581268, 'timestamp': '2025-09-10 02:53:03.355394', 'step': 14305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:03.408608', 'step': 14305, 'epoch': 2} {'type': 'loss', 'content': 0.0975688174366951, 'timestamp': '2025-09-10 02:53:03.410949', 'step': 14306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:03.465326', 'step': 14306, 'epoch': 2} {'type': 'loss', 'content': 0.07719609886407852, 'timestamp': '2025-09-10 02:53:03.467520', 'step': 14307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:03.521638', 'step': 14307, 'epoch': 2} {'type': 'loss', 'content': 0.10702583193778992, 'timestamp': '2025-09-10 02:53:03.527537', 'step': 14308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:03.581211', 'step': 14308, 'epoch': 2} {'type': 'loss', 'content': 0.12551257014274597, 'timestamp': '2025-09-10 02:53:03.583463', 'step': 14309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:03.637011', 'step': 14309, 'epoch': 2} {'type': 'loss', 'content': 0.04799880087375641, 'timestamp': '2025-09-10 02:53:03.639036', 'step': 14310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:03.693871', 'step': 14310, 'epoch': 2} {'type': 'loss', 'content': 0.08150524646043777, 'timestamp': '2025-09-10 02:53:03.696195', 'step': 14311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:03.749561', 'step': 14311, 'epoch': 2} {'type': 'loss', 'content': 0.055341240018606186, 'timestamp': '2025-09-10 02:53:03.755545', 'step': 14312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:03.808947', 'step': 14312, 'epoch': 2} {'type': 'loss', 'content': 0.0775524452328682, 'timestamp': '2025-09-10 02:53:03.811006', 'step': 14313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:03.864378', 'step': 14313, 'epoch': 2} {'type': 'loss', 'content': 0.19771941006183624, 'timestamp': '2025-09-10 02:53:03.866535', 'step': 14314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:03.919468', 'step': 14314, 'epoch': 2} {'type': 'loss', 'content': 0.1631355881690979, 'timestamp': '2025-09-10 02:53:03.921658', 'step': 14315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:03.974953', 'step': 14315, 'epoch': 2} {'type': 'loss', 'content': 0.15825632214546204, 'timestamp': '2025-09-10 02:53:03.980889', 'step': 14316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:04.033483', 'step': 14316, 'epoch': 2} {'type': 'loss', 'content': 0.03718903660774231, 'timestamp': '2025-09-10 02:53:04.035502', 'step': 14317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:04.089435', 'step': 14317, 'epoch': 2} {'type': 'loss', 'content': 0.10028016567230225, 'timestamp': '2025-09-10 02:53:04.091448', 'step': 14318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:04.144509', 'step': 14318, 'epoch': 2} {'type': 'loss', 'content': 0.0884137749671936, 'timestamp': '2025-09-10 02:53:04.146657', 'step': 14319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:04.200829', 'step': 14319, 'epoch': 2} {'type': 'loss', 'content': 0.0910821333527565, 'timestamp': '2025-09-10 02:53:04.206498', 'step': 14320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:04.260113', 'step': 14320, 'epoch': 2} {'type': 'loss', 'content': 0.10944033414125443, 'timestamp': '2025-09-10 02:53:04.262164', 'step': 14321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:04.317612', 'step': 14321, 'epoch': 2} {'type': 'loss', 'content': 0.11773320287466049, 'timestamp': '2025-09-10 02:53:04.319815', 'step': 14322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:04.373607', 'step': 14322, 'epoch': 2} {'type': 'loss', 'content': 0.2168698012828827, 'timestamp': '2025-09-10 02:53:04.375655', 'step': 14323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:04.428741', 'step': 14323, 'epoch': 2} {'type': 'loss', 'content': 0.156826913356781, 'timestamp': '2025-09-10 02:53:04.434710', 'step': 14324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:04.582331', 'step': 14324, 'epoch': 2} {'type': 'loss', 'content': 0.08208991587162018, 'timestamp': '2025-09-10 02:53:04.584422', 'step': 14325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:04.637109', 'step': 14325, 'epoch': 2} {'type': 'loss', 'content': 0.09247874468564987, 'timestamp': '2025-09-10 02:53:04.640691', 'step': 14326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:04.696783', 'step': 14326, 'epoch': 2} {'type': 'loss', 'content': 0.12202504277229309, 'timestamp': '2025-09-10 02:53:04.699097', 'step': 14327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:04.752664', 'step': 14327, 'epoch': 2} {'type': 'loss', 'content': 0.10264457762241364, 'timestamp': '2025-09-10 02:53:04.758580', 'step': 14328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:04.811564', 'step': 14328, 'epoch': 2} {'type': 'loss', 'content': 0.11022692918777466, 'timestamp': '2025-09-10 02:53:04.813994', 'step': 14329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:04.867655', 'step': 14329, 'epoch': 2} {'type': 'loss', 'content': 0.09582376480102539, 'timestamp': '2025-09-10 02:53:04.870605', 'step': 14330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:04.923884', 'step': 14330, 'epoch': 2} {'type': 'loss', 'content': 0.13887231051921844, 'timestamp': '2025-09-10 02:53:04.926182', 'step': 14331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:04.980900', 'step': 14331, 'epoch': 2} {'type': 'loss', 'content': 0.15772826969623566, 'timestamp': '2025-09-10 02:53:04.987049', 'step': 14332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:05.039869', 'step': 14332, 'epoch': 2} {'type': 'loss', 'content': 0.1360063999891281, 'timestamp': '2025-09-10 02:53:05.042133', 'step': 14333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:05.096195', 'step': 14333, 'epoch': 2} {'type': 'loss', 'content': 0.1366618424654007, 'timestamp': '2025-09-10 02:53:05.098468', 'step': 14334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:05.153797', 'step': 14334, 'epoch': 2} {'type': 'loss', 'content': 0.17984049022197723, 'timestamp': '2025-09-10 02:53:05.156127', 'step': 14335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:05.209624', 'step': 14335, 'epoch': 2} {'type': 'loss', 'content': 0.1047888994216919, 'timestamp': '2025-09-10 02:53:05.215600', 'step': 14336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:05.276071', 'step': 14336, 'epoch': 2} {'type': 'loss', 'content': 0.11047468334436417, 'timestamp': '2025-09-10 02:53:05.277974', 'step': 14337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:05.332476', 'step': 14337, 'epoch': 2} {'type': 'loss', 'content': 0.12433827668428421, 'timestamp': '2025-09-10 02:53:05.334836', 'step': 14338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:05.389111', 'step': 14338, 'epoch': 2} {'type': 'loss', 'content': 0.08990966528654099, 'timestamp': '2025-09-10 02:53:05.391428', 'step': 14339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:05.445385', 'step': 14339, 'epoch': 2} {'type': 'loss', 'content': 0.07315909117460251, 'timestamp': '2025-09-10 02:53:05.451304', 'step': 14340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:05.504502', 'step': 14340, 'epoch': 2} {'type': 'loss', 'content': 0.08445533365011215, 'timestamp': '2025-09-10 02:53:05.506780', 'step': 14341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:05.559672', 'step': 14341, 'epoch': 2} {'type': 'loss', 'content': 0.20673613250255585, 'timestamp': '2025-09-10 02:53:05.561818', 'step': 14342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:05.615750', 'step': 14342, 'epoch': 2} {'type': 'loss', 'content': 0.10296667367219925, 'timestamp': '2025-09-10 02:53:05.617980', 'step': 14343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:05.671513', 'step': 14343, 'epoch': 2} {'type': 'loss', 'content': 0.13789035379886627, 'timestamp': '2025-09-10 02:53:05.677503', 'step': 14344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:05.730263', 'step': 14344, 'epoch': 2} {'type': 'loss', 'content': 0.09367679804563522, 'timestamp': '2025-09-10 02:53:05.732798', 'step': 14345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:05.786033', 'step': 14345, 'epoch': 2} {'type': 'loss', 'content': 0.08505548536777496, 'timestamp': '2025-09-10 02:53:05.788245', 'step': 14346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:05.841380', 'step': 14346, 'epoch': 2} {'type': 'loss', 'content': 0.0814920961856842, 'timestamp': '2025-09-10 02:53:05.843671', 'step': 14347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:05.897337', 'step': 14347, 'epoch': 2} {'type': 'loss', 'content': 0.07788314670324326, 'timestamp': '2025-09-10 02:53:05.903269', 'step': 14348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:05.955667', 'step': 14348, 'epoch': 2} {'type': 'loss', 'content': 0.11197176575660706, 'timestamp': '2025-09-10 02:53:05.957973', 'step': 14349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:06.011541', 'step': 14349, 'epoch': 2} {'type': 'loss', 'content': 0.10013464093208313, 'timestamp': '2025-09-10 02:53:06.013804', 'step': 14350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:06.066944', 'step': 14350, 'epoch': 2} {'type': 'loss', 'content': 0.13005894422531128, 'timestamp': '2025-09-10 02:53:06.069139', 'step': 14351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:06.122812', 'step': 14351, 'epoch': 2} {'type': 'loss', 'content': 0.11598774045705795, 'timestamp': '2025-09-10 02:53:06.128663', 'step': 14352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:06.181407', 'step': 14352, 'epoch': 2} {'type': 'loss', 'content': 0.1834764927625656, 'timestamp': '2025-09-10 02:53:06.183710', 'step': 14353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:06.236650', 'step': 14353, 'epoch': 2} {'type': 'loss', 'content': 0.06968051940202713, 'timestamp': '2025-09-10 02:53:06.238905', 'step': 14354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:06.292343', 'step': 14354, 'epoch': 2} {'type': 'loss', 'content': 0.1134573295712471, 'timestamp': '2025-09-10 02:53:06.294462', 'step': 14355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:06.346872', 'step': 14355, 'epoch': 2} {'type': 'loss', 'content': 0.1149587631225586, 'timestamp': '2025-09-10 02:53:06.352715', 'step': 14356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:06.405805', 'step': 14356, 'epoch': 2} {'type': 'loss', 'content': 0.09616892784833908, 'timestamp': '2025-09-10 02:53:06.408139', 'step': 14357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:06.461459', 'step': 14357, 'epoch': 2} {'type': 'loss', 'content': 0.10214856266975403, 'timestamp': '2025-09-10 02:53:06.463956', 'step': 14358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:06.517535', 'step': 14358, 'epoch': 2} {'type': 'loss', 'content': 0.153480663895607, 'timestamp': '2025-09-10 02:53:06.520378', 'step': 14359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:06.573726', 'step': 14359, 'epoch': 2} {'type': 'loss', 'content': 0.11392556130886078, 'timestamp': '2025-09-10 02:53:06.579779', 'step': 14360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:06.632971', 'step': 14360, 'epoch': 2} {'type': 'loss', 'content': 0.09134510904550552, 'timestamp': '2025-09-10 02:53:06.635312', 'step': 14361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:06.690492', 'step': 14361, 'epoch': 2} {'type': 'loss', 'content': 0.21496033668518066, 'timestamp': '2025-09-10 02:53:06.692814', 'step': 14362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:06.746203', 'step': 14362, 'epoch': 2} {'type': 'loss', 'content': 0.1706119030714035, 'timestamp': '2025-09-10 02:53:06.748432', 'step': 14363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:06.805972', 'step': 14363, 'epoch': 2} {'type': 'loss', 'content': 0.08755023777484894, 'timestamp': '2025-09-10 02:53:06.811760', 'step': 14364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:06.864521', 'step': 14364, 'epoch': 2} {'type': 'loss', 'content': 0.15386487543582916, 'timestamp': '2025-09-10 02:53:06.866956', 'step': 14365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:06.920110', 'step': 14365, 'epoch': 2} {'type': 'loss', 'content': 0.19125810265541077, 'timestamp': '2025-09-10 02:53:06.922439', 'step': 14366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:06.978633', 'step': 14366, 'epoch': 2} {'type': 'loss', 'content': 0.11304226517677307, 'timestamp': '2025-09-10 02:53:06.980942', 'step': 14367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:07.035496', 'step': 14367, 'epoch': 2} {'type': 'loss', 'content': 0.08637683093547821, 'timestamp': '2025-09-10 02:53:07.041635', 'step': 14368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:07.094194', 'step': 14368, 'epoch': 2} {'type': 'loss', 'content': 0.09849397093057632, 'timestamp': '2025-09-10 02:53:07.096638', 'step': 14369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:07.149956', 'step': 14369, 'epoch': 2} {'type': 'loss', 'content': 0.1882951408624649, 'timestamp': '2025-09-10 02:53:07.152360', 'step': 14370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:07.207309', 'step': 14370, 'epoch': 2} {'type': 'loss', 'content': 0.10417469590902328, 'timestamp': '2025-09-10 02:53:07.209596', 'step': 14371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:07.262770', 'step': 14371, 'epoch': 2} {'type': 'loss', 'content': 0.09341596812009811, 'timestamp': '2025-09-10 02:53:07.268854', 'step': 14372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:07.321828', 'step': 14372, 'epoch': 2} {'type': 'loss', 'content': 0.08527964353561401, 'timestamp': '2025-09-10 02:53:07.324203', 'step': 14373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:07.377084', 'step': 14373, 'epoch': 2} {'type': 'loss', 'content': 0.08101311326026917, 'timestamp': '2025-09-10 02:53:07.379578', 'step': 14374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:07.432656', 'step': 14374, 'epoch': 2} {'type': 'loss', 'content': 0.1697290688753128, 'timestamp': '2025-09-10 02:53:07.435033', 'step': 14375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:07.488481', 'step': 14375, 'epoch': 2} {'type': 'loss', 'content': 0.10797655582427979, 'timestamp': '2025-09-10 02:53:07.495150', 'step': 14376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:07.548429', 'step': 14376, 'epoch': 2} {'type': 'loss', 'content': 0.07593968510627747, 'timestamp': '2025-09-10 02:53:07.550747', 'step': 14377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:07.604801', 'step': 14377, 'epoch': 2} {'type': 'loss', 'content': 0.058531295508146286, 'timestamp': '2025-09-10 02:53:07.607290', 'step': 14378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:07.661058', 'step': 14378, 'epoch': 2} {'type': 'loss', 'content': 0.14863039553165436, 'timestamp': '2025-09-10 02:53:07.663402', 'step': 14379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:07.717401', 'step': 14379, 'epoch': 2} {'type': 'loss', 'content': 0.0654718354344368, 'timestamp': '2025-09-10 02:53:07.723505', 'step': 14380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:07.776555', 'step': 14380, 'epoch': 2} {'type': 'loss', 'content': 0.10677619278430939, 'timestamp': '2025-09-10 02:53:07.778826', 'step': 14381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:07.832064', 'step': 14381, 'epoch': 2} {'type': 'loss', 'content': 0.10932382941246033, 'timestamp': '2025-09-10 02:53:07.834462', 'step': 14382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:07.887771', 'step': 14382, 'epoch': 2} {'type': 'loss', 'content': 0.12197455018758774, 'timestamp': '2025-09-10 02:53:07.890112', 'step': 14383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:07.943211', 'step': 14383, 'epoch': 2} {'type': 'loss', 'content': 0.09239935874938965, 'timestamp': '2025-09-10 02:53:07.949272', 'step': 14384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:08.002008', 'step': 14384, 'epoch': 2} {'type': 'loss', 'content': 0.08698802441358566, 'timestamp': '2025-09-10 02:53:08.004405', 'step': 14385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:08.058726', 'step': 14385, 'epoch': 2} {'type': 'loss', 'content': 0.08723696321249008, 'timestamp': '2025-09-10 02:53:08.061007', 'step': 14386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:08.115228', 'step': 14386, 'epoch': 2} {'type': 'loss', 'content': 0.1307862251996994, 'timestamp': '2025-09-10 02:53:08.117762', 'step': 14387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:08.170916', 'step': 14387, 'epoch': 2} {'type': 'loss', 'content': 0.13092008233070374, 'timestamp': '2025-09-10 02:53:08.177305', 'step': 14388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:08.230259', 'step': 14388, 'epoch': 2} {'type': 'loss', 'content': 0.15769994258880615, 'timestamp': '2025-09-10 02:53:08.232594', 'step': 14389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:08.285658', 'step': 14389, 'epoch': 2} {'type': 'loss', 'content': 0.1395101398229599, 'timestamp': '2025-09-10 02:53:08.287920', 'step': 14390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:08.341567', 'step': 14390, 'epoch': 2} {'type': 'loss', 'content': 0.08032723516225815, 'timestamp': '2025-09-10 02:53:08.343799', 'step': 14391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:08.397084', 'step': 14391, 'epoch': 2} {'type': 'loss', 'content': 0.04837925359606743, 'timestamp': '2025-09-10 02:53:08.403212', 'step': 14392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:08.457838', 'step': 14392, 'epoch': 2} {'type': 'loss', 'content': 0.04714687541127205, 'timestamp': '2025-09-10 02:53:08.460150', 'step': 14393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:08.513992', 'step': 14393, 'epoch': 2} {'type': 'loss', 'content': 0.10824581980705261, 'timestamp': '2025-09-10 02:53:08.516556', 'step': 14394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:08.570688', 'step': 14394, 'epoch': 2} {'type': 'loss', 'content': 0.05360814183950424, 'timestamp': '2025-09-10 02:53:08.573124', 'step': 14395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:08.629544', 'step': 14395, 'epoch': 2} {'type': 'loss', 'content': 0.09044785052537918, 'timestamp': '2025-09-10 02:53:08.636101', 'step': 14396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:08.689918', 'step': 14396, 'epoch': 2} {'type': 'loss', 'content': 0.07406529784202576, 'timestamp': '2025-09-10 02:53:08.692222', 'step': 14397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:08.745485', 'step': 14397, 'epoch': 2} {'type': 'loss', 'content': 0.06864971667528152, 'timestamp': '2025-09-10 02:53:08.747511', 'step': 14398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:08.801650', 'step': 14398, 'epoch': 2} {'type': 'loss', 'content': 0.11548762768507004, 'timestamp': '2025-09-10 02:53:08.804052', 'step': 14399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:08.858124', 'step': 14399, 'epoch': 2} {'type': 'loss', 'content': 0.11923734843730927, 'timestamp': '2025-09-10 02:53:08.864245', 'step': 14400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:08.918392', 'step': 14400, 'epoch': 2} {'type': 'loss', 'content': 0.15349729359149933, 'timestamp': '2025-09-10 02:53:08.920954', 'step': 14401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:08.975578', 'step': 14401, 'epoch': 2} {'type': 'loss', 'content': 0.09159792214632034, 'timestamp': '2025-09-10 02:53:08.978097', 'step': 14402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:09.034450', 'step': 14402, 'epoch': 2} {'type': 'loss', 'content': 0.12370890378952026, 'timestamp': '2025-09-10 02:53:09.036808', 'step': 14403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:09.090761', 'step': 14403, 'epoch': 2} {'type': 'loss', 'content': 0.10323479771614075, 'timestamp': '2025-09-10 02:53:09.096895', 'step': 14404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:09.150841', 'step': 14404, 'epoch': 2} {'type': 'loss', 'content': 0.11574717611074448, 'timestamp': '2025-09-10 02:53:09.153211', 'step': 14405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:09.207316', 'step': 14405, 'epoch': 2} {'type': 'loss', 'content': 0.10431976616382599, 'timestamp': '2025-09-10 02:53:09.210155', 'step': 14406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:09.266673', 'step': 14406, 'epoch': 2} {'type': 'loss', 'content': 0.08611632883548737, 'timestamp': '2025-09-10 02:53:09.269419', 'step': 14407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:09.324872', 'step': 14407, 'epoch': 2} {'type': 'loss', 'content': 0.07886408269405365, 'timestamp': '2025-09-10 02:53:09.331248', 'step': 14408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:09.384796', 'step': 14408, 'epoch': 2} {'type': 'loss', 'content': 0.1368541568517685, 'timestamp': '2025-09-10 02:53:09.387211', 'step': 14409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:09.440913', 'step': 14409, 'epoch': 2} {'type': 'loss', 'content': 0.23914848268032074, 'timestamp': '2025-09-10 02:53:09.443288', 'step': 14410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:09.496507', 'step': 14410, 'epoch': 2} {'type': 'loss', 'content': 0.08432676643133163, 'timestamp': '2025-09-10 02:53:09.498837', 'step': 14411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:09.552656', 'step': 14411, 'epoch': 2} {'type': 'loss', 'content': 0.07690563797950745, 'timestamp': '2025-09-10 02:53:09.558828', 'step': 14412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:09.614863', 'step': 14412, 'epoch': 2} {'type': 'loss', 'content': 0.131620392203331, 'timestamp': '2025-09-10 02:53:09.618653', 'step': 14413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:09.677304', 'step': 14413, 'epoch': 2} {'type': 'loss', 'content': 0.0827980786561966, 'timestamp': '2025-09-10 02:53:09.679762', 'step': 14414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:09.760461', 'step': 14414, 'epoch': 2} {'type': 'loss', 'content': 0.19575613737106323, 'timestamp': '2025-09-10 02:53:09.762997', 'step': 14415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:09.840570', 'step': 14415, 'epoch': 2} {'type': 'loss', 'content': 0.07404110580682755, 'timestamp': '2025-09-10 02:53:09.846805', 'step': 14416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:09.913541', 'step': 14416, 'epoch': 2} {'type': 'loss', 'content': 0.2794681489467621, 'timestamp': '2025-09-10 02:53:09.915610', 'step': 14417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:10.001397', 'step': 14417, 'epoch': 2} {'type': 'loss', 'content': 0.06913655251264572, 'timestamp': '2025-09-10 02:53:10.003491', 'step': 14418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:10.080973', 'step': 14418, 'epoch': 2} {'type': 'loss', 'content': 0.15715351700782776, 'timestamp': '2025-09-10 02:53:10.083312', 'step': 14419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:10.162998', 'step': 14419, 'epoch': 2} {'type': 'loss', 'content': 0.12260470539331436, 'timestamp': '2025-09-10 02:53:10.169382', 'step': 14420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:10.249536', 'step': 14420, 'epoch': 2} {'type': 'loss', 'content': 0.14420269429683685, 'timestamp': '2025-09-10 02:53:10.252010', 'step': 14421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:10.306808', 'step': 14421, 'epoch': 2} {'type': 'loss', 'content': 0.13126371800899506, 'timestamp': '2025-09-10 02:53:10.309081', 'step': 14422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:10.363780', 'step': 14422, 'epoch': 2} {'type': 'loss', 'content': 0.08924707770347595, 'timestamp': '2025-09-10 02:53:10.366078', 'step': 14423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:10.420321', 'step': 14423, 'epoch': 2} {'type': 'loss', 'content': 0.09602682292461395, 'timestamp': '2025-09-10 02:53:10.426580', 'step': 14424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:10.480829', 'step': 14424, 'epoch': 2} {'type': 'loss', 'content': 0.12999136745929718, 'timestamp': '2025-09-10 02:53:10.483286', 'step': 14425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:10.537084', 'step': 14425, 'epoch': 2} {'type': 'loss', 'content': 0.06379666179418564, 'timestamp': '2025-09-10 02:53:10.539459', 'step': 14426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:10.592955', 'step': 14426, 'epoch': 2} {'type': 'loss', 'content': 0.1114979013800621, 'timestamp': '2025-09-10 02:53:10.595297', 'step': 14427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:10.649747', 'step': 14427, 'epoch': 2} {'type': 'loss', 'content': 0.1593797355890274, 'timestamp': '2025-09-10 02:53:10.655928', 'step': 14428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:10.710632', 'step': 14428, 'epoch': 2} {'type': 'loss', 'content': 0.03102038986980915, 'timestamp': '2025-09-10 02:53:10.712884', 'step': 14429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:10.771176', 'step': 14429, 'epoch': 2} {'type': 'loss', 'content': 0.10862267017364502, 'timestamp': '2025-09-10 02:53:10.773781', 'step': 14430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:10.827773', 'step': 14430, 'epoch': 2} {'type': 'loss', 'content': 0.11664409935474396, 'timestamp': '2025-09-10 02:53:10.830210', 'step': 14431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:10.885377', 'step': 14431, 'epoch': 2} {'type': 'loss', 'content': 0.07988628000020981, 'timestamp': '2025-09-10 02:53:10.891532', 'step': 14432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:10.945816', 'step': 14432, 'epoch': 2} {'type': 'loss', 'content': 0.25262853503227234, 'timestamp': '2025-09-10 02:53:10.948096', 'step': 14433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:11.002148', 'step': 14433, 'epoch': 2} {'type': 'loss', 'content': 0.12035654485225677, 'timestamp': '2025-09-10 02:53:11.004432', 'step': 14434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:11.058253', 'step': 14434, 'epoch': 2} {'type': 'loss', 'content': 0.16118833422660828, 'timestamp': '2025-09-10 02:53:11.060124', 'step': 14435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:11.114673', 'step': 14435, 'epoch': 2} {'type': 'loss', 'content': 0.09560441225767136, 'timestamp': '2025-09-10 02:53:11.120769', 'step': 14436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:11.174989', 'step': 14436, 'epoch': 2} {'type': 'loss', 'content': 0.13160936534404755, 'timestamp': '2025-09-10 02:53:11.177350', 'step': 14437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:11.231457', 'step': 14437, 'epoch': 2} {'type': 'loss', 'content': 0.23276031017303467, 'timestamp': '2025-09-10 02:53:11.233754', 'step': 14438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:11.290740', 'step': 14438, 'epoch': 2} {'type': 'loss', 'content': 0.10163009911775589, 'timestamp': '2025-09-10 02:53:11.293207', 'step': 14439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:11.348980', 'step': 14439, 'epoch': 2} {'type': 'loss', 'content': 0.13779108226299286, 'timestamp': '2025-09-10 02:53:11.355591', 'step': 14440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:11.409983', 'step': 14440, 'epoch': 2} {'type': 'loss', 'content': 0.10964315384626389, 'timestamp': '2025-09-10 02:53:11.412336', 'step': 14441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:11.466713', 'step': 14441, 'epoch': 2} {'type': 'loss', 'content': 0.05513403192162514, 'timestamp': '2025-09-10 02:53:11.469071', 'step': 14442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:11.523448', 'step': 14442, 'epoch': 2} {'type': 'loss', 'content': 0.17136096954345703, 'timestamp': '2025-09-10 02:53:11.525728', 'step': 14443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:11.579991', 'step': 14443, 'epoch': 2} {'type': 'loss', 'content': 0.05335969477891922, 'timestamp': '2025-09-10 02:53:11.586402', 'step': 14444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:11.640096', 'step': 14444, 'epoch': 2} {'type': 'loss', 'content': 0.06420638412237167, 'timestamp': '2025-09-10 02:53:11.642453', 'step': 14445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:11.696067', 'step': 14445, 'epoch': 2} {'type': 'loss', 'content': 0.1762455552816391, 'timestamp': '2025-09-10 02:53:11.698462', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:53:24.602279', 'step': 14446, 'epoch': 2} {'type': 'pplx', 'content': 11729.38927278885, 'timestamp': '2025-09-10 02:53:24.608292', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:24.666748', 'step': 14446, 'epoch': 2} {'type': 'loss', 'content': 0.16492630541324615, 'timestamp': '2025-09-10 02:53:24.668814', 'step': 14447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:24.725439', 'step': 14447, 'epoch': 2} {'type': 'loss', 'content': 0.07843048125505447, 'timestamp': '2025-09-10 02:53:24.732249', 'step': 14448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:24.788370', 'step': 14448, 'epoch': 2} {'type': 'loss', 'content': 0.06160544231534004, 'timestamp': '2025-09-10 02:53:24.790556', 'step': 14449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:24.849555', 'step': 14449, 'epoch': 2} {'type': 'loss', 'content': 0.1376228630542755, 'timestamp': '2025-09-10 02:53:24.851931', 'step': 14450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:24.909606', 'step': 14450, 'epoch': 2} {'type': 'loss', 'content': 0.17038686573505402, 'timestamp': '2025-09-10 02:53:24.913086', 'step': 14451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:24.967405', 'step': 14451, 'epoch': 2} {'type': 'loss', 'content': 0.11992330104112625, 'timestamp': '2025-09-10 02:53:24.974112', 'step': 14452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:25.037720', 'step': 14452, 'epoch': 2} {'type': 'loss', 'content': 0.1411868929862976, 'timestamp': '2025-09-10 02:53:25.041536', 'step': 14453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:25.098939', 'step': 14453, 'epoch': 2} {'type': 'loss', 'content': 0.08560355007648468, 'timestamp': '2025-09-10 02:53:25.105238', 'step': 14454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:25.162901', 'step': 14454, 'epoch': 2} {'type': 'loss', 'content': 0.2075091153383255, 'timestamp': '2025-09-10 02:53:25.165224', 'step': 14455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:25.222267', 'step': 14455, 'epoch': 2} {'type': 'loss', 'content': 0.0739806517958641, 'timestamp': '2025-09-10 02:53:25.228752', 'step': 14456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:25.284878', 'step': 14456, 'epoch': 2} {'type': 'loss', 'content': 0.10116086155176163, 'timestamp': '2025-09-10 02:53:25.287002', 'step': 14457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:25.342724', 'step': 14457, 'epoch': 2} {'type': 'loss', 'content': 0.16596947610378265, 'timestamp': '2025-09-10 02:53:25.345018', 'step': 14458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:25.401428', 'step': 14458, 'epoch': 2} {'type': 'loss', 'content': 0.08187434077262878, 'timestamp': '2025-09-10 02:53:25.404720', 'step': 14459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:25.461871', 'step': 14459, 'epoch': 2} {'type': 'loss', 'content': 0.21318086981773376, 'timestamp': '2025-09-10 02:53:25.473637', 'step': 14460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:25.533119', 'step': 14460, 'epoch': 2} {'type': 'loss', 'content': 0.04504900425672531, 'timestamp': '2025-09-10 02:53:25.536680', 'step': 14461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:25.603417', 'step': 14461, 'epoch': 2} {'type': 'loss', 'content': 0.11798939108848572, 'timestamp': '2025-09-10 02:53:25.606408', 'step': 14462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:25.671267', 'step': 14462, 'epoch': 2} {'type': 'loss', 'content': 0.18259401619434357, 'timestamp': '2025-09-10 02:53:25.684447', 'step': 14463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:25.750844', 'step': 14463, 'epoch': 2} {'type': 'loss', 'content': 0.12452609091997147, 'timestamp': '2025-09-10 02:53:25.757563', 'step': 14464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:25.813409', 'step': 14464, 'epoch': 2} {'type': 'loss', 'content': 0.12470444291830063, 'timestamp': '2025-09-10 02:53:25.820549', 'step': 14465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:25.880362', 'step': 14465, 'epoch': 2} {'type': 'loss', 'content': 0.08745244145393372, 'timestamp': '2025-09-10 02:53:25.882565', 'step': 14466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:25.938254', 'step': 14466, 'epoch': 2} {'type': 'loss', 'content': 0.1253739893436432, 'timestamp': '2025-09-10 02:53:25.940750', 'step': 14467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:26.015606', 'step': 14467, 'epoch': 2} {'type': 'loss', 'content': 0.08054060488939285, 'timestamp': '2025-09-10 02:53:26.023312', 'step': 14468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:26.085793', 'step': 14468, 'epoch': 2} {'type': 'loss', 'content': 0.15271160006523132, 'timestamp': '2025-09-10 02:53:26.088254', 'step': 14469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:26.144101', 'step': 14469, 'epoch': 2} {'type': 'loss', 'content': 0.0931171253323555, 'timestamp': '2025-09-10 02:53:26.147821', 'step': 14470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:26.218850', 'step': 14470, 'epoch': 2} {'type': 'loss', 'content': 0.09404557943344116, 'timestamp': '2025-09-10 02:53:26.222034', 'step': 14471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:26.277815', 'step': 14471, 'epoch': 2} {'type': 'loss', 'content': 0.1301497220993042, 'timestamp': '2025-09-10 02:53:26.284329', 'step': 14472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:26.338637', 'step': 14472, 'epoch': 2} {'type': 'loss', 'content': 0.164958655834198, 'timestamp': '2025-09-10 02:53:26.341190', 'step': 14473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:26.395679', 'step': 14473, 'epoch': 2} {'type': 'loss', 'content': 0.1854083091020584, 'timestamp': '2025-09-10 02:53:26.398151', 'step': 14474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:26.455511', 'step': 14474, 'epoch': 2} {'type': 'loss', 'content': 0.1082114428281784, 'timestamp': '2025-09-10 02:53:26.457920', 'step': 14475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:26.513318', 'step': 14475, 'epoch': 2} {'type': 'loss', 'content': 0.18944531679153442, 'timestamp': '2025-09-10 02:53:26.519895', 'step': 14476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:26.576623', 'step': 14476, 'epoch': 2} {'type': 'loss', 'content': 0.18246199190616608, 'timestamp': '2025-09-10 02:53:26.579010', 'step': 14477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:26.634715', 'step': 14477, 'epoch': 2} {'type': 'loss', 'content': 0.16542896628379822, 'timestamp': '2025-09-10 02:53:26.636775', 'step': 14478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:26.692494', 'step': 14478, 'epoch': 2} {'type': 'loss', 'content': 0.15794025361537933, 'timestamp': '2025-09-10 02:53:26.694740', 'step': 14479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:26.751493', 'step': 14479, 'epoch': 2} {'type': 'loss', 'content': 0.053227074444293976, 'timestamp': '2025-09-10 02:53:26.757801', 'step': 14480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:26.812369', 'step': 14480, 'epoch': 2} {'type': 'loss', 'content': 0.14168231189250946, 'timestamp': '2025-09-10 02:53:26.814728', 'step': 14481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:26.871200', 'step': 14481, 'epoch': 2} {'type': 'loss', 'content': 0.10533034056425095, 'timestamp': '2025-09-10 02:53:26.873634', 'step': 14482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:26.928819', 'step': 14482, 'epoch': 2} {'type': 'loss', 'content': 0.09660343825817108, 'timestamp': '2025-09-10 02:53:26.931391', 'step': 14483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:26.986576', 'step': 14483, 'epoch': 2} {'type': 'loss', 'content': 0.0741567388176918, 'timestamp': '2025-09-10 02:53:26.992876', 'step': 14484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:27.048123', 'step': 14484, 'epoch': 2} {'type': 'loss', 'content': 0.1395188719034195, 'timestamp': '2025-09-10 02:53:27.050451', 'step': 14485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:27.105421', 'step': 14485, 'epoch': 2} {'type': 'loss', 'content': 0.20432746410369873, 'timestamp': '2025-09-10 02:53:27.107673', 'step': 14486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:27.163902', 'step': 14486, 'epoch': 2} {'type': 'loss', 'content': 0.14048053324222565, 'timestamp': '2025-09-10 02:53:27.167395', 'step': 14487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:27.224114', 'step': 14487, 'epoch': 2} {'type': 'loss', 'content': 0.07979749888181686, 'timestamp': '2025-09-10 02:53:27.230522', 'step': 14488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:27.284880', 'step': 14488, 'epoch': 2} {'type': 'loss', 'content': 0.08488302677869797, 'timestamp': '2025-09-10 02:53:27.287177', 'step': 14489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:27.342951', 'step': 14489, 'epoch': 2} {'type': 'loss', 'content': 0.11425937712192535, 'timestamp': '2025-09-10 02:53:27.345303', 'step': 14490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:27.400798', 'step': 14490, 'epoch': 2} {'type': 'loss', 'content': 0.042428236454725266, 'timestamp': '2025-09-10 02:53:27.403081', 'step': 14491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:27.459637', 'step': 14491, 'epoch': 2} {'type': 'loss', 'content': 0.06144926697015762, 'timestamp': '2025-09-10 02:53:27.465970', 'step': 14492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:27.520543', 'step': 14492, 'epoch': 2} {'type': 'loss', 'content': 0.10662518441677094, 'timestamp': '2025-09-10 02:53:27.522881', 'step': 14493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:27.579916', 'step': 14493, 'epoch': 2} {'type': 'loss', 'content': 0.15411332249641418, 'timestamp': '2025-09-10 02:53:27.582190', 'step': 14494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:27.637986', 'step': 14494, 'epoch': 2} {'type': 'loss', 'content': 0.0935932919383049, 'timestamp': '2025-09-10 02:53:27.640247', 'step': 14495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:27.694277', 'step': 14495, 'epoch': 2} {'type': 'loss', 'content': 0.08126739412546158, 'timestamp': '2025-09-10 02:53:27.700802', 'step': 14496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:27.755979', 'step': 14496, 'epoch': 2} {'type': 'loss', 'content': 0.10876670479774475, 'timestamp': '2025-09-10 02:53:27.758336', 'step': 14497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:27.813067', 'step': 14497, 'epoch': 2} {'type': 'loss', 'content': 0.0866684839129448, 'timestamp': '2025-09-10 02:53:27.815301', 'step': 14498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:27.869712', 'step': 14498, 'epoch': 2} {'type': 'loss', 'content': 0.10675650835037231, 'timestamp': '2025-09-10 02:53:27.871745', 'step': 14499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:27.926729', 'step': 14499, 'epoch': 2} {'type': 'loss', 'content': 0.13193510472774506, 'timestamp': '2025-09-10 02:53:27.932994', 'step': 14500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14500', 'timestamp': '2025-09-10 02:53:28.297574', 'step': 14500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:28.357930', 'step': 14500, 'epoch': 2} {'type': 'loss', 'content': 0.10436307638883591, 'timestamp': '2025-09-10 02:53:28.360100', 'step': 14501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:28.415534', 'step': 14501, 'epoch': 2} {'type': 'loss', 'content': 0.18923282623291016, 'timestamp': '2025-09-10 02:53:28.417763', 'step': 14502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:28.474154', 'step': 14502, 'epoch': 2} {'type': 'loss', 'content': 0.10139115899801254, 'timestamp': '2025-09-10 02:53:28.476470', 'step': 14503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:28.532073', 'step': 14503, 'epoch': 2} {'type': 'loss', 'content': 0.10519500076770782, 'timestamp': '2025-09-10 02:53:28.538515', 'step': 14504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:28.594006', 'step': 14504, 'epoch': 2} {'type': 'loss', 'content': 0.09632384777069092, 'timestamp': '2025-09-10 02:53:28.596207', 'step': 14505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:28.652304', 'step': 14505, 'epoch': 2} {'type': 'loss', 'content': 0.06519539654254913, 'timestamp': '2025-09-10 02:53:28.654633', 'step': 14506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:28.709762', 'step': 14506, 'epoch': 2} {'type': 'loss', 'content': 0.15870213508605957, 'timestamp': '2025-09-10 02:53:28.711979', 'step': 14507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:28.767379', 'step': 14507, 'epoch': 2} {'type': 'loss', 'content': 0.1278337836265564, 'timestamp': '2025-09-10 02:53:28.773552', 'step': 14508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:28.827604', 'step': 14508, 'epoch': 2} {'type': 'loss', 'content': 0.08538803458213806, 'timestamp': '2025-09-10 02:53:28.829788', 'step': 14509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:28.885033', 'step': 14509, 'epoch': 2} {'type': 'loss', 'content': 0.10056053847074509, 'timestamp': '2025-09-10 02:53:28.887360', 'step': 14510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:28.942086', 'step': 14510, 'epoch': 2} {'type': 'loss', 'content': 0.16311007738113403, 'timestamp': '2025-09-10 02:53:28.944586', 'step': 14511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:29.000126', 'step': 14511, 'epoch': 2} {'type': 'loss', 'content': 0.16897831857204437, 'timestamp': '2025-09-10 02:53:29.006402', 'step': 14512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:29.060834', 'step': 14512, 'epoch': 2} {'type': 'loss', 'content': 0.12872952222824097, 'timestamp': '2025-09-10 02:53:29.063090', 'step': 14513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:29.117514', 'step': 14513, 'epoch': 2} {'type': 'loss', 'content': 0.13188256323337555, 'timestamp': '2025-09-10 02:53:29.119712', 'step': 14514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:29.174489', 'step': 14514, 'epoch': 2} {'type': 'loss', 'content': 0.14536447823047638, 'timestamp': '2025-09-10 02:53:29.176811', 'step': 14515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:29.232174', 'step': 14515, 'epoch': 2} {'type': 'loss', 'content': 0.07207128405570984, 'timestamp': '2025-09-10 02:53:29.238590', 'step': 14516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:29.292690', 'step': 14516, 'epoch': 2} {'type': 'loss', 'content': 0.15793587267398834, 'timestamp': '2025-09-10 02:53:29.294948', 'step': 14517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:29.349162', 'step': 14517, 'epoch': 2} {'type': 'loss', 'content': 0.18830721080303192, 'timestamp': '2025-09-10 02:53:29.351528', 'step': 14518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:29.406474', 'step': 14518, 'epoch': 2} {'type': 'loss', 'content': 0.24463364481925964, 'timestamp': '2025-09-10 02:53:29.408652', 'step': 14519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:29.463433', 'step': 14519, 'epoch': 2} {'type': 'loss', 'content': 0.26311153173446655, 'timestamp': '2025-09-10 02:53:29.477123', 'step': 14520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:29.534896', 'step': 14520, 'epoch': 2} {'type': 'loss', 'content': 0.08754066377878189, 'timestamp': '2025-09-10 02:53:29.537844', 'step': 14521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:29.592308', 'step': 14521, 'epoch': 2} {'type': 'loss', 'content': 0.1997745782136917, 'timestamp': '2025-09-10 02:53:29.594250', 'step': 14522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:29.649172', 'step': 14522, 'epoch': 2} {'type': 'loss', 'content': 0.18597112596035004, 'timestamp': '2025-09-10 02:53:29.651476', 'step': 14523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:29.709610', 'step': 14523, 'epoch': 2} {'type': 'loss', 'content': 0.15794837474822998, 'timestamp': '2025-09-10 02:53:29.717008', 'step': 14524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:29.773477', 'step': 14524, 'epoch': 2} {'type': 'loss', 'content': 0.1225159764289856, 'timestamp': '2025-09-10 02:53:29.775723', 'step': 14525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:29.829973', 'step': 14525, 'epoch': 2} {'type': 'loss', 'content': 0.15750309824943542, 'timestamp': '2025-09-10 02:53:29.831966', 'step': 14526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:29.891364', 'step': 14526, 'epoch': 2} {'type': 'loss', 'content': 0.16631627082824707, 'timestamp': '2025-09-10 02:53:29.893345', 'step': 14527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:29.948341', 'step': 14527, 'epoch': 2} {'type': 'loss', 'content': 0.04747013375163078, 'timestamp': '2025-09-10 02:53:29.958695', 'step': 14528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:30.018249', 'step': 14528, 'epoch': 2} {'type': 'loss', 'content': 0.10021815448999405, 'timestamp': '2025-09-10 02:53:30.020598', 'step': 14529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:30.088263', 'step': 14529, 'epoch': 2} {'type': 'loss', 'content': 0.08814087510108948, 'timestamp': '2025-09-10 02:53:30.090538', 'step': 14530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:30.147437', 'step': 14530, 'epoch': 2} {'type': 'loss', 'content': 0.13068260252475739, 'timestamp': '2025-09-10 02:53:30.149648', 'step': 14531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:30.207022', 'step': 14531, 'epoch': 2} {'type': 'loss', 'content': 0.12206703424453735, 'timestamp': '2025-09-10 02:53:30.216116', 'step': 14532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:30.278647', 'step': 14532, 'epoch': 2} {'type': 'loss', 'content': 0.06692897528409958, 'timestamp': '2025-09-10 02:53:30.280872', 'step': 14533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:30.337773', 'step': 14533, 'epoch': 2} {'type': 'loss', 'content': 0.14068225026130676, 'timestamp': '2025-09-10 02:53:30.339980', 'step': 14534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:30.395763', 'step': 14534, 'epoch': 2} {'type': 'loss', 'content': 0.0808192789554596, 'timestamp': '2025-09-10 02:53:30.397862', 'step': 14535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:30.454081', 'step': 14535, 'epoch': 2} {'type': 'loss', 'content': 0.0897526741027832, 'timestamp': '2025-09-10 02:53:30.460675', 'step': 14536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:30.519613', 'step': 14536, 'epoch': 2} {'type': 'loss', 'content': 0.06717191636562347, 'timestamp': '2025-09-10 02:53:30.521871', 'step': 14537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:30.577665', 'step': 14537, 'epoch': 2} {'type': 'loss', 'content': 0.1477959305047989, 'timestamp': '2025-09-10 02:53:30.579848', 'step': 14538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:30.636223', 'step': 14538, 'epoch': 2} {'type': 'loss', 'content': 0.1291283816099167, 'timestamp': '2025-09-10 02:53:30.638583', 'step': 14539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:30.693126', 'step': 14539, 'epoch': 2} {'type': 'loss', 'content': 0.08716016262769699, 'timestamp': '2025-09-10 02:53:30.699786', 'step': 14540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:30.754059', 'step': 14540, 'epoch': 2} {'type': 'loss', 'content': 0.14129352569580078, 'timestamp': '2025-09-10 02:53:30.757227', 'step': 14541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:30.812243', 'step': 14541, 'epoch': 2} {'type': 'loss', 'content': 0.12206341326236725, 'timestamp': '2025-09-10 02:53:30.814245', 'step': 14542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:30.869084', 'step': 14542, 'epoch': 2} {'type': 'loss', 'content': 0.08093704283237457, 'timestamp': '2025-09-10 02:53:30.871076', 'step': 14543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:30.925513', 'step': 14543, 'epoch': 2} {'type': 'loss', 'content': 0.12042523175477982, 'timestamp': '2025-09-10 02:53:30.931912', 'step': 14544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:30.986511', 'step': 14544, 'epoch': 2} {'type': 'loss', 'content': 0.15707850456237793, 'timestamp': '2025-09-10 02:53:30.988814', 'step': 14545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:31.043750', 'step': 14545, 'epoch': 2} {'type': 'loss', 'content': 0.12899558246135712, 'timestamp': '2025-09-10 02:53:31.045903', 'step': 14546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:31.100874', 'step': 14546, 'epoch': 2} {'type': 'loss', 'content': 0.14784252643585205, 'timestamp': '2025-09-10 02:53:31.102927', 'step': 14547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:31.157944', 'step': 14547, 'epoch': 2} {'type': 'loss', 'content': 0.11458941549062729, 'timestamp': '2025-09-10 02:53:31.164054', 'step': 14548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:31.217519', 'step': 14548, 'epoch': 2} {'type': 'loss', 'content': 0.08532220125198364, 'timestamp': '2025-09-10 02:53:31.219473', 'step': 14549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:31.274146', 'step': 14549, 'epoch': 2} {'type': 'loss', 'content': 0.017397385090589523, 'timestamp': '2025-09-10 02:53:31.276244', 'step': 14550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:31.336941', 'step': 14550, 'epoch': 2} {'type': 'loss', 'content': 0.09949694573879242, 'timestamp': '2025-09-10 02:53:31.338925', 'step': 14551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:31.394093', 'step': 14551, 'epoch': 2} {'type': 'loss', 'content': 0.06340747326612473, 'timestamp': '2025-09-10 02:53:31.400150', 'step': 14552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:31.454321', 'step': 14552, 'epoch': 2} {'type': 'loss', 'content': 0.06541716307401657, 'timestamp': '2025-09-10 02:53:31.456339', 'step': 14553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:31.510874', 'step': 14553, 'epoch': 2} {'type': 'loss', 'content': 0.060007985681295395, 'timestamp': '2025-09-10 02:53:31.512964', 'step': 14554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:31.567568', 'step': 14554, 'epoch': 2} {'type': 'loss', 'content': 0.18424418568611145, 'timestamp': '2025-09-10 02:53:31.569682', 'step': 14555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:31.624666', 'step': 14555, 'epoch': 2} {'type': 'loss', 'content': 0.11062683165073395, 'timestamp': '2025-09-10 02:53:31.631006', 'step': 14556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:31.691417', 'step': 14556, 'epoch': 2} {'type': 'loss', 'content': 0.12272260338068008, 'timestamp': '2025-09-10 02:53:31.693713', 'step': 14557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:31.749641', 'step': 14557, 'epoch': 2} {'type': 'loss', 'content': 0.1439312845468521, 'timestamp': '2025-09-10 02:53:31.751865', 'step': 14558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:31.809106', 'step': 14558, 'epoch': 2} {'type': 'loss', 'content': 0.08754248917102814, 'timestamp': '2025-09-10 02:53:31.811484', 'step': 14559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:31.871007', 'step': 14559, 'epoch': 2} {'type': 'loss', 'content': 0.13475021719932556, 'timestamp': '2025-09-10 02:53:31.877959', 'step': 14560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:31.936906', 'step': 14560, 'epoch': 2} {'type': 'loss', 'content': 0.12988919019699097, 'timestamp': '2025-09-10 02:53:31.939185', 'step': 14561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:31.996477', 'step': 14561, 'epoch': 2} {'type': 'loss', 'content': 0.1788720041513443, 'timestamp': '2025-09-10 02:53:31.998447', 'step': 14562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:32.056946', 'step': 14562, 'epoch': 2} {'type': 'loss', 'content': 0.10016525536775589, 'timestamp': '2025-09-10 02:53:32.059401', 'step': 14563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:32.114465', 'step': 14563, 'epoch': 2} {'type': 'loss', 'content': 0.17536291480064392, 'timestamp': '2025-09-10 02:53:32.120795', 'step': 14564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:32.174461', 'step': 14564, 'epoch': 2} {'type': 'loss', 'content': 0.061302389949560165, 'timestamp': '2025-09-10 02:53:32.176444', 'step': 14565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:32.230709', 'step': 14565, 'epoch': 2} {'type': 'loss', 'content': 0.11904603242874146, 'timestamp': '2025-09-10 02:53:32.232937', 'step': 14566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:32.288724', 'step': 14566, 'epoch': 2} {'type': 'loss', 'content': 0.06998898088932037, 'timestamp': '2025-09-10 02:53:32.291175', 'step': 14567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:32.346035', 'step': 14567, 'epoch': 2} {'type': 'loss', 'content': 0.10160034894943237, 'timestamp': '2025-09-10 02:53:32.352651', 'step': 14568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:32.407513', 'step': 14568, 'epoch': 2} {'type': 'loss', 'content': 0.0631071999669075, 'timestamp': '2025-09-10 02:53:32.409775', 'step': 14569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:32.466034', 'step': 14569, 'epoch': 2} {'type': 'loss', 'content': 0.1245269626379013, 'timestamp': '2025-09-10 02:53:32.468209', 'step': 14570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:32.522866', 'step': 14570, 'epoch': 2} {'type': 'loss', 'content': 0.11027755588293076, 'timestamp': '2025-09-10 02:53:32.525128', 'step': 14571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:32.580270', 'step': 14571, 'epoch': 2} {'type': 'loss', 'content': 0.18713729083538055, 'timestamp': '2025-09-10 02:53:32.586580', 'step': 14572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:32.640597', 'step': 14572, 'epoch': 2} {'type': 'loss', 'content': 0.1021200567483902, 'timestamp': '2025-09-10 02:53:32.642885', 'step': 14573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:32.697294', 'step': 14573, 'epoch': 2} {'type': 'loss', 'content': 0.05013776570558548, 'timestamp': '2025-09-10 02:53:32.699489', 'step': 14574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:32.753651', 'step': 14574, 'epoch': 2} {'type': 'loss', 'content': 0.12477404624223709, 'timestamp': '2025-09-10 02:53:32.755829', 'step': 14575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:32.809331', 'step': 14575, 'epoch': 2} {'type': 'loss', 'content': 0.057885460555553436, 'timestamp': '2025-09-10 02:53:32.815144', 'step': 14576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:32.868320', 'step': 14576, 'epoch': 2} {'type': 'loss', 'content': 0.13082124292850494, 'timestamp': '2025-09-10 02:53:32.870176', 'step': 14577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:32.922851', 'step': 14577, 'epoch': 2} {'type': 'loss', 'content': 0.2047160118818283, 'timestamp': '2025-09-10 02:53:32.924968', 'step': 14578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:32.979094', 'step': 14578, 'epoch': 2} {'type': 'loss', 'content': 0.06653791666030884, 'timestamp': '2025-09-10 02:53:32.981362', 'step': 14579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:33.035445', 'step': 14579, 'epoch': 2} {'type': 'loss', 'content': 0.10442493855953217, 'timestamp': '2025-09-10 02:53:33.041457', 'step': 14580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:33.094816', 'step': 14580, 'epoch': 2} {'type': 'loss', 'content': 0.13824282586574554, 'timestamp': '2025-09-10 02:53:33.097155', 'step': 14581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:33.152777', 'step': 14581, 'epoch': 2} {'type': 'loss', 'content': 0.08097274601459503, 'timestamp': '2025-09-10 02:53:33.155167', 'step': 14582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:33.210079', 'step': 14582, 'epoch': 2} {'type': 'loss', 'content': 0.08959934860467911, 'timestamp': '2025-09-10 02:53:33.212437', 'step': 14583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:33.267328', 'step': 14583, 'epoch': 2} {'type': 'loss', 'content': 0.08814627677202225, 'timestamp': '2025-09-10 02:53:33.273085', 'step': 14584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:33.326725', 'step': 14584, 'epoch': 2} {'type': 'loss', 'content': 0.0759742334485054, 'timestamp': '2025-09-10 02:53:33.328980', 'step': 14585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:33.382709', 'step': 14585, 'epoch': 2} {'type': 'loss', 'content': 0.0794636681675911, 'timestamp': '2025-09-10 02:53:33.384842', 'step': 14586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:33.440637', 'step': 14586, 'epoch': 2} {'type': 'loss', 'content': 0.08272125571966171, 'timestamp': '2025-09-10 02:53:33.442989', 'step': 14587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:33.498500', 'step': 14587, 'epoch': 2} {'type': 'loss', 'content': 0.1971357762813568, 'timestamp': '2025-09-10 02:53:33.504671', 'step': 14588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:33.558697', 'step': 14588, 'epoch': 2} {'type': 'loss', 'content': 0.105123370885849, 'timestamp': '2025-09-10 02:53:33.560869', 'step': 14589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:33.615639', 'step': 14589, 'epoch': 2} {'type': 'loss', 'content': 0.1798660159111023, 'timestamp': '2025-09-10 02:53:33.617735', 'step': 14590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:33.672136', 'step': 14590, 'epoch': 2} {'type': 'loss', 'content': 0.0910537913441658, 'timestamp': '2025-09-10 02:53:33.674179', 'step': 14591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:33.728818', 'step': 14591, 'epoch': 2} {'type': 'loss', 'content': 0.11077810078859329, 'timestamp': '2025-09-10 02:53:33.735050', 'step': 14592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:33.788904', 'step': 14592, 'epoch': 2} {'type': 'loss', 'content': 0.10169950872659683, 'timestamp': '2025-09-10 02:53:33.791199', 'step': 14593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:33.845482', 'step': 14593, 'epoch': 2} {'type': 'loss', 'content': 0.06089625507593155, 'timestamp': '2025-09-10 02:53:33.847699', 'step': 14594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:33.901644', 'step': 14594, 'epoch': 2} {'type': 'loss', 'content': 0.12984471023082733, 'timestamp': '2025-09-10 02:53:33.903869', 'step': 14595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:33.957276', 'step': 14595, 'epoch': 2} {'type': 'loss', 'content': 0.10167602449655533, 'timestamp': '2025-09-10 02:53:33.962924', 'step': 14596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:34.016359', 'step': 14596, 'epoch': 2} {'type': 'loss', 'content': 0.05472392216324806, 'timestamp': '2025-09-10 02:53:34.018740', 'step': 14597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:34.073725', 'step': 14597, 'epoch': 2} {'type': 'loss', 'content': 0.06880873441696167, 'timestamp': '2025-09-10 02:53:34.076055', 'step': 14598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:34.130494', 'step': 14598, 'epoch': 2} {'type': 'loss', 'content': 0.05221740901470184, 'timestamp': '2025-09-10 02:53:34.132761', 'step': 14599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:34.188091', 'step': 14599, 'epoch': 2} {'type': 'loss', 'content': 0.05999195948243141, 'timestamp': '2025-09-10 02:53:34.194465', 'step': 14600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:34.248024', 'step': 14600, 'epoch': 2} {'type': 'loss', 'content': 0.08381737023591995, 'timestamp': '2025-09-10 02:53:34.250229', 'step': 14601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:34.303917', 'step': 14601, 'epoch': 2} {'type': 'loss', 'content': 0.06541405618190765, 'timestamp': '2025-09-10 02:53:34.306069', 'step': 14602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:34.359558', 'step': 14602, 'epoch': 2} {'type': 'loss', 'content': 0.14079169929027557, 'timestamp': '2025-09-10 02:53:34.361871', 'step': 14603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:34.415260', 'step': 14603, 'epoch': 2} {'type': 'loss', 'content': 0.15572220087051392, 'timestamp': '2025-09-10 02:53:34.421447', 'step': 14604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:34.474445', 'step': 14604, 'epoch': 2} {'type': 'loss', 'content': 0.056196942925453186, 'timestamp': '2025-09-10 02:53:34.476359', 'step': 14605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:34.529004', 'step': 14605, 'epoch': 2} {'type': 'loss', 'content': 0.14703282713890076, 'timestamp': '2025-09-10 02:53:34.530934', 'step': 14606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:34.584446', 'step': 14606, 'epoch': 2} {'type': 'loss', 'content': 0.050477705895900726, 'timestamp': '2025-09-10 02:53:34.586377', 'step': 14607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:34.639258', 'step': 14607, 'epoch': 2} {'type': 'loss', 'content': 0.09260270744562149, 'timestamp': '2025-09-10 02:53:34.645080', 'step': 14608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:34.697196', 'step': 14608, 'epoch': 2} {'type': 'loss', 'content': 0.13682252168655396, 'timestamp': '2025-09-10 02:53:34.699161', 'step': 14609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:34.751803', 'step': 14609, 'epoch': 2} {'type': 'loss', 'content': 0.11248082667589188, 'timestamp': '2025-09-10 02:53:34.753897', 'step': 14610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:34.807119', 'step': 14610, 'epoch': 2} {'type': 'loss', 'content': 0.1011388972401619, 'timestamp': '2025-09-10 02:53:34.809220', 'step': 14611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:34.862510', 'step': 14611, 'epoch': 2} {'type': 'loss', 'content': 0.1638161689043045, 'timestamp': '2025-09-10 02:53:34.868437', 'step': 14612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:34.921573', 'step': 14612, 'epoch': 2} {'type': 'loss', 'content': 0.0739968940615654, 'timestamp': '2025-09-10 02:53:34.923780', 'step': 14613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:34.976998', 'step': 14613, 'epoch': 2} {'type': 'loss', 'content': 0.09437794983386993, 'timestamp': '2025-09-10 02:53:34.979192', 'step': 14614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:35.032294', 'step': 14614, 'epoch': 2} {'type': 'loss', 'content': 0.09720474481582642, 'timestamp': '2025-09-10 02:53:35.034487', 'step': 14615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:35.088767', 'step': 14615, 'epoch': 2} {'type': 'loss', 'content': 0.10563651472330093, 'timestamp': '2025-09-10 02:53:35.094802', 'step': 14616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:35.148105', 'step': 14616, 'epoch': 2} {'type': 'loss', 'content': 0.09408789128065109, 'timestamp': '2025-09-10 02:53:35.150297', 'step': 14617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:35.205352', 'step': 14617, 'epoch': 2} {'type': 'loss', 'content': 0.14110910892486572, 'timestamp': '2025-09-10 02:53:35.207499', 'step': 14618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:35.262304', 'step': 14618, 'epoch': 2} {'type': 'loss', 'content': 0.1854841411113739, 'timestamp': '2025-09-10 02:53:35.264480', 'step': 14619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:35.319509', 'step': 14619, 'epoch': 2} {'type': 'loss', 'content': 0.0847332626581192, 'timestamp': '2025-09-10 02:53:35.325496', 'step': 14620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:35.380714', 'step': 14620, 'epoch': 2} {'type': 'loss', 'content': 0.2076897770166397, 'timestamp': '2025-09-10 02:53:35.382836', 'step': 14621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:35.435842', 'step': 14621, 'epoch': 2} {'type': 'loss', 'content': 0.1257307529449463, 'timestamp': '2025-09-10 02:53:35.438021', 'step': 14622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:35.491865', 'step': 14622, 'epoch': 2} {'type': 'loss', 'content': 0.13170188665390015, 'timestamp': '2025-09-10 02:53:35.494122', 'step': 14623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:35.546769', 'step': 14623, 'epoch': 2} {'type': 'loss', 'content': 0.11372905224561691, 'timestamp': '2025-09-10 02:53:35.552739', 'step': 14624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:35.606788', 'step': 14624, 'epoch': 2} {'type': 'loss', 'content': 0.09914439171552658, 'timestamp': '2025-09-10 02:53:35.609082', 'step': 14625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:35.664269', 'step': 14625, 'epoch': 2} {'type': 'loss', 'content': 0.06638966500759125, 'timestamp': '2025-09-10 02:53:35.666632', 'step': 14626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:35.721697', 'step': 14626, 'epoch': 2} {'type': 'loss', 'content': 0.22849997878074646, 'timestamp': '2025-09-10 02:53:35.723726', 'step': 14627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:35.778543', 'step': 14627, 'epoch': 2} {'type': 'loss', 'content': 0.07627242803573608, 'timestamp': '2025-09-10 02:53:35.784896', 'step': 14628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:35.838163', 'step': 14628, 'epoch': 2} {'type': 'loss', 'content': 0.14465481042861938, 'timestamp': '2025-09-10 02:53:35.840087', 'step': 14629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:35.893178', 'step': 14629, 'epoch': 2} {'type': 'loss', 'content': 0.07184048742055893, 'timestamp': '2025-09-10 02:53:35.895159', 'step': 14630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:35.948322', 'step': 14630, 'epoch': 2} {'type': 'loss', 'content': 0.10575103759765625, 'timestamp': '2025-09-10 02:53:35.950361', 'step': 14631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:36.002888', 'step': 14631, 'epoch': 2} {'type': 'loss', 'content': 0.13524217903614044, 'timestamp': '2025-09-10 02:53:36.008701', 'step': 14632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:36.061114', 'step': 14632, 'epoch': 2} {'type': 'loss', 'content': 0.15614373981952667, 'timestamp': '2025-09-10 02:53:36.063062', 'step': 14633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:36.115995', 'step': 14633, 'epoch': 2} {'type': 'loss', 'content': 0.09719538688659668, 'timestamp': '2025-09-10 02:53:36.117938', 'step': 14634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:36.171900', 'step': 14634, 'epoch': 2} {'type': 'loss', 'content': 0.09044712781906128, 'timestamp': '2025-09-10 02:53:36.174046', 'step': 14635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:36.227986', 'step': 14635, 'epoch': 2} {'type': 'loss', 'content': 0.12307003140449524, 'timestamp': '2025-09-10 02:53:36.233766', 'step': 14636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:36.285596', 'step': 14636, 'epoch': 2} {'type': 'loss', 'content': 0.14618076384067535, 'timestamp': '2025-09-10 02:53:36.287525', 'step': 14637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:36.341346', 'step': 14637, 'epoch': 2} {'type': 'loss', 'content': 0.0948350802063942, 'timestamp': '2025-09-10 02:53:36.343628', 'step': 14638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:36.400703', 'step': 14638, 'epoch': 2} {'type': 'loss', 'content': 0.06458792090415955, 'timestamp': '2025-09-10 02:53:36.404910', 'step': 14639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:36.464858', 'step': 14639, 'epoch': 2} {'type': 'loss', 'content': 0.11656538397073746, 'timestamp': '2025-09-10 02:53:36.470840', 'step': 14640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:36.523913', 'step': 14640, 'epoch': 2} {'type': 'loss', 'content': 0.17186424136161804, 'timestamp': '2025-09-10 02:53:36.525845', 'step': 14641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:36.579028', 'step': 14641, 'epoch': 2} {'type': 'loss', 'content': 0.11220435053110123, 'timestamp': '2025-09-10 02:53:36.581028', 'step': 14642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:36.638417', 'step': 14642, 'epoch': 2} {'type': 'loss', 'content': 0.09065048396587372, 'timestamp': '2025-09-10 02:53:36.640443', 'step': 14643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:36.703672', 'step': 14643, 'epoch': 2} {'type': 'loss', 'content': 0.16980749368667603, 'timestamp': '2025-09-10 02:53:36.711283', 'step': 14644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:36.764671', 'step': 14644, 'epoch': 2} {'type': 'loss', 'content': 0.14346449077129364, 'timestamp': '2025-09-10 02:53:36.768858', 'step': 14645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:36.826274', 'step': 14645, 'epoch': 2} {'type': 'loss', 'content': 0.08093318343162537, 'timestamp': '2025-09-10 02:53:36.828270', 'step': 14646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:36.881046', 'step': 14646, 'epoch': 2} {'type': 'loss', 'content': 0.15363715589046478, 'timestamp': '2025-09-10 02:53:36.884904', 'step': 14647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:36.943073', 'step': 14647, 'epoch': 2} {'type': 'loss', 'content': 0.12027007341384888, 'timestamp': '2025-09-10 02:53:36.948783', 'step': 14648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:37.013173', 'step': 14648, 'epoch': 2} {'type': 'loss', 'content': 0.19836698472499847, 'timestamp': '2025-09-10 02:53:37.015175', 'step': 14649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:37.069503', 'step': 14649, 'epoch': 2} {'type': 'loss', 'content': 0.08140835911035538, 'timestamp': '2025-09-10 02:53:37.071444', 'step': 14650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:37.126219', 'step': 14650, 'epoch': 2} {'type': 'loss', 'content': 0.15694093704223633, 'timestamp': '2025-09-10 02:53:37.128139', 'step': 14651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:37.184757', 'step': 14651, 'epoch': 2} {'type': 'loss', 'content': 0.1731836497783661, 'timestamp': '2025-09-10 02:53:37.190650', 'step': 14652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:37.243063', 'step': 14652, 'epoch': 2} {'type': 'loss', 'content': 0.163223534822464, 'timestamp': '2025-09-10 02:53:37.245106', 'step': 14653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:37.297982', 'step': 14653, 'epoch': 2} {'type': 'loss', 'content': 0.14042827486991882, 'timestamp': '2025-09-10 02:53:37.303671', 'step': 14654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:37.368804', 'step': 14654, 'epoch': 2} {'type': 'loss', 'content': 0.16653424501419067, 'timestamp': '2025-09-10 02:53:37.374832', 'step': 14655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:37.433065', 'step': 14655, 'epoch': 2} {'type': 'loss', 'content': 0.1491674780845642, 'timestamp': '2025-09-10 02:53:37.453353', 'step': 14656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:37.512383', 'step': 14656, 'epoch': 2} {'type': 'loss', 'content': 0.14220130443572998, 'timestamp': '2025-09-10 02:53:37.522441', 'step': 14657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:37.587253', 'step': 14657, 'epoch': 2} {'type': 'loss', 'content': 0.05693173035979271, 'timestamp': '2025-09-10 02:53:37.592097', 'step': 14658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:37.647337', 'step': 14658, 'epoch': 2} {'type': 'loss', 'content': 0.10666602849960327, 'timestamp': '2025-09-10 02:53:37.649407', 'step': 14659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:37.702598', 'step': 14659, 'epoch': 2} {'type': 'loss', 'content': 0.08701758831739426, 'timestamp': '2025-09-10 02:53:37.708341', 'step': 14660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:37.771690', 'step': 14660, 'epoch': 2} {'type': 'loss', 'content': 0.056258637458086014, 'timestamp': '2025-09-10 02:53:37.773678', 'step': 14661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:37.831724', 'step': 14661, 'epoch': 2} {'type': 'loss', 'content': 0.1046786829829216, 'timestamp': '2025-09-10 02:53:37.833682', 'step': 14662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:37.889790', 'step': 14662, 'epoch': 2} {'type': 'loss', 'content': 0.21470727026462555, 'timestamp': '2025-09-10 02:53:37.891869', 'step': 14663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:37.946625', 'step': 14663, 'epoch': 2} {'type': 'loss', 'content': 0.16950932145118713, 'timestamp': '2025-09-10 02:53:37.952238', 'step': 14664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:38.004730', 'step': 14664, 'epoch': 2} {'type': 'loss', 'content': 0.14704649150371552, 'timestamp': '2025-09-10 02:53:38.006704', 'step': 14665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:38.062334', 'step': 14665, 'epoch': 2} {'type': 'loss', 'content': 0.09746111929416656, 'timestamp': '2025-09-10 02:53:38.064485', 'step': 14666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:38.120568', 'step': 14666, 'epoch': 2} {'type': 'loss', 'content': 0.18051449954509735, 'timestamp': '2025-09-10 02:53:38.123768', 'step': 14667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:38.190601', 'step': 14667, 'epoch': 2} {'type': 'loss', 'content': 0.15443331003189087, 'timestamp': '2025-09-10 02:53:38.196305', 'step': 14668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:38.263319', 'step': 14668, 'epoch': 2} {'type': 'loss', 'content': 0.09373566508293152, 'timestamp': '2025-09-10 02:53:38.266751', 'step': 14669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:38.332694', 'step': 14669, 'epoch': 2} {'type': 'loss', 'content': 0.1118294969201088, 'timestamp': '2025-09-10 02:53:38.335047', 'step': 14670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:38.419116', 'step': 14670, 'epoch': 2} {'type': 'loss', 'content': 0.07073495537042618, 'timestamp': '2025-09-10 02:53:38.423115', 'step': 14671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:38.481080', 'step': 14671, 'epoch': 2} {'type': 'loss', 'content': 0.0856843814253807, 'timestamp': '2025-09-10 02:53:38.486825', 'step': 14672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:38.570208', 'step': 14672, 'epoch': 2} {'type': 'loss', 'content': 0.09791862219572067, 'timestamp': '2025-09-10 02:53:38.574574', 'step': 14673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:38.635364', 'step': 14673, 'epoch': 2} {'type': 'loss', 'content': 0.12349013984203339, 'timestamp': '2025-09-10 02:53:38.637508', 'step': 14674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:38.693532', 'step': 14674, 'epoch': 2} {'type': 'loss', 'content': 0.0773535817861557, 'timestamp': '2025-09-10 02:53:38.697573', 'step': 14675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:38.754939', 'step': 14675, 'epoch': 2} {'type': 'loss', 'content': 0.08966846764087677, 'timestamp': '2025-09-10 02:53:38.760735', 'step': 14676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:38.816798', 'step': 14676, 'epoch': 2} {'type': 'loss', 'content': 0.12784157693386078, 'timestamp': '2025-09-10 02:53:38.818800', 'step': 14677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:38.871757', 'step': 14677, 'epoch': 2} {'type': 'loss', 'content': 0.06822272390127182, 'timestamp': '2025-09-10 02:53:38.873785', 'step': 14678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:38.926933', 'step': 14678, 'epoch': 2} {'type': 'loss', 'content': 0.05617132410407066, 'timestamp': '2025-09-10 02:53:38.929048', 'step': 14679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:38.982436', 'step': 14679, 'epoch': 2} {'type': 'loss', 'content': 0.11010818183422089, 'timestamp': '2025-09-10 02:53:38.988103', 'step': 14680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:39.042181', 'step': 14680, 'epoch': 2} {'type': 'loss', 'content': 0.09786385297775269, 'timestamp': '2025-09-10 02:53:39.044194', 'step': 14681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:39.098283', 'step': 14681, 'epoch': 2} {'type': 'loss', 'content': 0.1046018898487091, 'timestamp': '2025-09-10 02:53:39.100222', 'step': 14682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:39.154353', 'step': 14682, 'epoch': 2} {'type': 'loss', 'content': 0.12664183974266052, 'timestamp': '2025-09-10 02:53:39.157034', 'step': 14683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:39.212254', 'step': 14683, 'epoch': 2} {'type': 'loss', 'content': 0.07153812050819397, 'timestamp': '2025-09-10 02:53:39.218925', 'step': 14684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:39.279276', 'step': 14684, 'epoch': 2} {'type': 'loss', 'content': 0.11702751368284225, 'timestamp': '2025-09-10 02:53:39.281237', 'step': 14685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:39.335617', 'step': 14685, 'epoch': 2} {'type': 'loss', 'content': 0.15410226583480835, 'timestamp': '2025-09-10 02:53:39.337625', 'step': 14686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:39.392618', 'step': 14686, 'epoch': 2} {'type': 'loss', 'content': 0.0679270401597023, 'timestamp': '2025-09-10 02:53:39.394624', 'step': 14687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:39.450482', 'step': 14687, 'epoch': 2} {'type': 'loss', 'content': 0.08285602927207947, 'timestamp': '2025-09-10 02:53:39.456196', 'step': 14688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:39.511292', 'step': 14688, 'epoch': 2} {'type': 'loss', 'content': 0.07238034904003143, 'timestamp': '2025-09-10 02:53:39.513286', 'step': 14689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:39.566620', 'step': 14689, 'epoch': 2} {'type': 'loss', 'content': 0.10189300030469894, 'timestamp': '2025-09-10 02:53:39.568400', 'step': 14690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:39.622595', 'step': 14690, 'epoch': 2} {'type': 'loss', 'content': 0.11287607252597809, 'timestamp': '2025-09-10 02:53:39.624766', 'step': 14691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:39.680452', 'step': 14691, 'epoch': 2} {'type': 'loss', 'content': 0.05132992938160896, 'timestamp': '2025-09-10 02:53:39.687251', 'step': 14692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:39.741769', 'step': 14692, 'epoch': 2} {'type': 'loss', 'content': 0.08860483765602112, 'timestamp': '2025-09-10 02:53:39.743808', 'step': 14693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:39.803659', 'step': 14693, 'epoch': 2} {'type': 'loss', 'content': 0.05350480601191521, 'timestamp': '2025-09-10 02:53:39.805732', 'step': 14694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:39.875895', 'step': 14694, 'epoch': 2} {'type': 'loss', 'content': 0.048282865434885025, 'timestamp': '2025-09-10 02:53:39.877778', 'step': 14695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:39.977950', 'step': 14695, 'epoch': 2} {'type': 'loss', 'content': 0.07800725847482681, 'timestamp': '2025-09-10 02:53:39.988986', 'step': 14696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:40.063125', 'step': 14696, 'epoch': 2} {'type': 'loss', 'content': 0.09143302589654922, 'timestamp': '2025-09-10 02:53:40.068466', 'step': 14697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:40.127933', 'step': 14697, 'epoch': 2} {'type': 'loss', 'content': 0.04970750957727432, 'timestamp': '2025-09-10 02:53:40.129952', 'step': 14698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:40.184548', 'step': 14698, 'epoch': 2} {'type': 'loss', 'content': 0.1194518432021141, 'timestamp': '2025-09-10 02:53:40.186435', 'step': 14699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:40.256031', 'step': 14699, 'epoch': 2} {'type': 'loss', 'content': 0.13225840032100677, 'timestamp': '2025-09-10 02:53:40.261667', 'step': 14700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:40.315740', 'step': 14700, 'epoch': 2} {'type': 'loss', 'content': 0.09514741599559784, 'timestamp': '2025-09-10 02:53:40.320814', 'step': 14701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:40.383789', 'step': 14701, 'epoch': 2} {'type': 'loss', 'content': 0.03958145156502724, 'timestamp': '2025-09-10 02:53:40.385798', 'step': 14702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:40.461709', 'step': 14702, 'epoch': 2} {'type': 'loss', 'content': 0.16684779524803162, 'timestamp': '2025-09-10 02:53:40.463816', 'step': 14703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:40.517940', 'step': 14703, 'epoch': 2} {'type': 'loss', 'content': 0.0810919851064682, 'timestamp': '2025-09-10 02:53:40.525684', 'step': 14704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:40.578776', 'step': 14704, 'epoch': 2} {'type': 'loss', 'content': 0.13505586981773376, 'timestamp': '2025-09-10 02:53:40.580701', 'step': 14705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:40.633641', 'step': 14705, 'epoch': 2} {'type': 'loss', 'content': 0.11918371915817261, 'timestamp': '2025-09-10 02:53:40.635502', 'step': 14706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:40.691393', 'step': 14706, 'epoch': 2} {'type': 'loss', 'content': 0.05698835104703903, 'timestamp': '2025-09-10 02:53:40.693596', 'step': 14707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:40.748908', 'step': 14707, 'epoch': 2} {'type': 'loss', 'content': 0.13005268573760986, 'timestamp': '2025-09-10 02:53:40.754581', 'step': 14708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:40.808003', 'step': 14708, 'epoch': 2} {'type': 'loss', 'content': 0.07756010442972183, 'timestamp': '2025-09-10 02:53:40.809806', 'step': 14709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:40.868727', 'step': 14709, 'epoch': 2} {'type': 'loss', 'content': 0.07186450809240341, 'timestamp': '2025-09-10 02:53:40.873287', 'step': 14710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:40.931711', 'step': 14710, 'epoch': 2} {'type': 'loss', 'content': 0.08117913454771042, 'timestamp': '2025-09-10 02:53:40.935418', 'step': 14711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:40.994387', 'step': 14711, 'epoch': 2} {'type': 'loss', 'content': 0.08684512972831726, 'timestamp': '2025-09-10 02:53:40.999986', 'step': 14712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:41.053771', 'step': 14712, 'epoch': 2} {'type': 'loss', 'content': 0.14241178333759308, 'timestamp': '2025-09-10 02:53:41.056058', 'step': 14713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:41.111190', 'step': 14713, 'epoch': 2} {'type': 'loss', 'content': 0.10681187361478806, 'timestamp': '2025-09-10 02:53:41.113114', 'step': 14714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:41.169301', 'step': 14714, 'epoch': 2} {'type': 'loss', 'content': 0.07156679779291153, 'timestamp': '2025-09-10 02:53:41.179601', 'step': 14715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:41.245555', 'step': 14715, 'epoch': 2} {'type': 'loss', 'content': 0.1737358719110489, 'timestamp': '2025-09-10 02:53:41.250934', 'step': 14716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:41.303343', 'step': 14716, 'epoch': 2} {'type': 'loss', 'content': 0.06714801490306854, 'timestamp': '2025-09-10 02:53:41.305415', 'step': 14717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:41.358679', 'step': 14717, 'epoch': 2} {'type': 'loss', 'content': 0.15002210438251495, 'timestamp': '2025-09-10 02:53:41.360436', 'step': 14718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:41.415496', 'step': 14718, 'epoch': 2} {'type': 'loss', 'content': 0.1613178700208664, 'timestamp': '2025-09-10 02:53:41.417354', 'step': 14719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:41.473849', 'step': 14719, 'epoch': 2} {'type': 'loss', 'content': 0.17937764525413513, 'timestamp': '2025-09-10 02:53:41.479343', 'step': 14720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:41.544446', 'step': 14720, 'epoch': 2} {'type': 'loss', 'content': 0.17704802751541138, 'timestamp': '2025-09-10 02:53:41.546420', 'step': 14721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:41.612779', 'step': 14721, 'epoch': 2} {'type': 'loss', 'content': 0.07946065068244934, 'timestamp': '2025-09-10 02:53:41.615008', 'step': 14722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:41.677328', 'step': 14722, 'epoch': 2} {'type': 'loss', 'content': 0.09431464225053787, 'timestamp': '2025-09-10 02:53:41.679294', 'step': 14723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:41.747240', 'step': 14723, 'epoch': 2} {'type': 'loss', 'content': 0.06818509846925735, 'timestamp': '2025-09-10 02:53:41.757654', 'step': 14724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:41.817992', 'step': 14724, 'epoch': 2} {'type': 'loss', 'content': 0.1959449052810669, 'timestamp': '2025-09-10 02:53:41.819979', 'step': 14725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:41.876181', 'step': 14725, 'epoch': 2} {'type': 'loss', 'content': 0.11027035117149353, 'timestamp': '2025-09-10 02:53:41.880017', 'step': 14726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:41.951186', 'step': 14726, 'epoch': 2} {'type': 'loss', 'content': 0.027103502303361893, 'timestamp': '2025-09-10 02:53:41.952962', 'step': 14727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:42.006492', 'step': 14727, 'epoch': 2} {'type': 'loss', 'content': 0.09203439205884933, 'timestamp': '2025-09-10 02:53:42.012081', 'step': 14728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:42.073287', 'step': 14728, 'epoch': 2} {'type': 'loss', 'content': 0.14442673325538635, 'timestamp': '2025-09-10 02:53:42.075025', 'step': 14729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:42.142418', 'step': 14729, 'epoch': 2} {'type': 'loss', 'content': 0.0935034453868866, 'timestamp': '2025-09-10 02:53:42.144183', 'step': 14730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:42.198461', 'step': 14730, 'epoch': 2} {'type': 'loss', 'content': 0.20276544988155365, 'timestamp': '2025-09-10 02:53:42.200370', 'step': 14731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:42.260813', 'step': 14731, 'epoch': 2} {'type': 'loss', 'content': 0.12033917009830475, 'timestamp': '2025-09-10 02:53:42.266341', 'step': 14732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:42.327820', 'step': 14732, 'epoch': 2} {'type': 'loss', 'content': 0.128065824508667, 'timestamp': '2025-09-10 02:53:42.333180', 'step': 14733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:42.400628', 'step': 14733, 'epoch': 2} {'type': 'loss', 'content': 0.09092075377702713, 'timestamp': '2025-09-10 02:53:42.402900', 'step': 14734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:42.494332', 'step': 14734, 'epoch': 2} {'type': 'loss', 'content': 0.057297252118587494, 'timestamp': '2025-09-10 02:53:42.496157', 'step': 14735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:42.569999', 'step': 14735, 'epoch': 2} {'type': 'loss', 'content': 0.14437316358089447, 'timestamp': '2025-09-10 02:53:42.577536', 'step': 14736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:42.687744', 'step': 14736, 'epoch': 2} {'type': 'loss', 'content': 0.06303386390209198, 'timestamp': '2025-09-10 02:53:42.689773', 'step': 14737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:42.778190', 'step': 14737, 'epoch': 2} {'type': 'loss', 'content': 0.16192519664764404, 'timestamp': '2025-09-10 02:53:42.780194', 'step': 14738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:42.855849', 'step': 14738, 'epoch': 2} {'type': 'loss', 'content': 0.09230867028236389, 'timestamp': '2025-09-10 02:53:42.858326', 'step': 14739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:42.921717', 'step': 14739, 'epoch': 2} {'type': 'loss', 'content': 0.12183287739753723, 'timestamp': '2025-09-10 02:53:42.928623', 'step': 14740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:42.986826', 'step': 14740, 'epoch': 2} {'type': 'loss', 'content': 0.20945174992084503, 'timestamp': '2025-09-10 02:53:42.989170', 'step': 14741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:43.043994', 'step': 14741, 'epoch': 2} {'type': 'loss', 'content': 0.08947545289993286, 'timestamp': '2025-09-10 02:53:43.050370', 'step': 14742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:43.115706', 'step': 14742, 'epoch': 2} {'type': 'loss', 'content': 0.1217465028166771, 'timestamp': '2025-09-10 02:53:43.117431', 'step': 14743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:43.172395', 'step': 14743, 'epoch': 2} {'type': 'loss', 'content': 0.011738664470613003, 'timestamp': '2025-09-10 02:53:43.182438', 'step': 14744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:43.257366', 'step': 14744, 'epoch': 2} {'type': 'loss', 'content': 0.03543715551495552, 'timestamp': '2025-09-10 02:53:43.259529', 'step': 14745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:43.316094', 'step': 14745, 'epoch': 2} {'type': 'loss', 'content': 0.10277791321277618, 'timestamp': '2025-09-10 02:53:43.318616', 'step': 14746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:43.379965', 'step': 14746, 'epoch': 2} {'type': 'loss', 'content': 0.09506460279226303, 'timestamp': '2025-09-10 02:53:43.382000', 'step': 14747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:43.438785', 'step': 14747, 'epoch': 2} {'type': 'loss', 'content': 0.11880487203598022, 'timestamp': '2025-09-10 02:53:43.444768', 'step': 14748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:43.500711', 'step': 14748, 'epoch': 2} {'type': 'loss', 'content': 0.05234520137310028, 'timestamp': '2025-09-10 02:53:43.504935', 'step': 14749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:43.560625', 'step': 14749, 'epoch': 2} {'type': 'loss', 'content': 0.14086425304412842, 'timestamp': '2025-09-10 02:53:43.562669', 'step': 14750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:43.624181', 'step': 14750, 'epoch': 2} {'type': 'loss', 'content': 0.08742626756429672, 'timestamp': '2025-09-10 02:53:43.625946', 'step': 14751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:43.681862', 'step': 14751, 'epoch': 2} {'type': 'loss', 'content': 0.1236792728304863, 'timestamp': '2025-09-10 02:53:43.689434', 'step': 14752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:43.743895', 'step': 14752, 'epoch': 2} {'type': 'loss', 'content': 0.11497478932142258, 'timestamp': '2025-09-10 02:53:43.745729', 'step': 14753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:43.800006', 'step': 14753, 'epoch': 2} {'type': 'loss', 'content': 0.08616138994693756, 'timestamp': '2025-09-10 02:53:43.804464', 'step': 14754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:43.857192', 'step': 14754, 'epoch': 2} {'type': 'loss', 'content': 0.11076045781373978, 'timestamp': '2025-09-10 02:53:43.859564', 'step': 14755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:43.917493', 'step': 14755, 'epoch': 2} {'type': 'loss', 'content': 0.07920829951763153, 'timestamp': '2025-09-10 02:53:43.923348', 'step': 14756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:43.978229', 'step': 14756, 'epoch': 2} {'type': 'loss', 'content': 0.10721626877784729, 'timestamp': '2025-09-10 02:53:43.985585', 'step': 14757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:44.050623', 'step': 14757, 'epoch': 2} {'type': 'loss', 'content': 0.16252872347831726, 'timestamp': '2025-09-10 02:53:44.053359', 'step': 14758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:44.112423', 'step': 14758, 'epoch': 2} {'type': 'loss', 'content': 0.05068521574139595, 'timestamp': '2025-09-10 02:53:44.114922', 'step': 14759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:44.176469', 'step': 14759, 'epoch': 2} {'type': 'loss', 'content': 0.07537973672151566, 'timestamp': '2025-09-10 02:53:44.182934', 'step': 14760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:44.257992', 'step': 14760, 'epoch': 2} {'type': 'loss', 'content': 0.1195329800248146, 'timestamp': '2025-09-10 02:53:44.260720', 'step': 14761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:44.327866', 'step': 14761, 'epoch': 2} {'type': 'loss', 'content': 0.11426161974668503, 'timestamp': '2025-09-10 02:53:44.330256', 'step': 14762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:44.387473', 'step': 14762, 'epoch': 2} {'type': 'loss', 'content': 0.11738347262144089, 'timestamp': '2025-09-10 02:53:44.389213', 'step': 14763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:44.456855', 'step': 14763, 'epoch': 2} {'type': 'loss', 'content': 0.0769597589969635, 'timestamp': '2025-09-10 02:53:44.462562', 'step': 14764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:44.534815', 'step': 14764, 'epoch': 2} {'type': 'loss', 'content': 0.11882924288511276, 'timestamp': '2025-09-10 02:53:44.536802', 'step': 14765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:44.593302', 'step': 14765, 'epoch': 2} {'type': 'loss', 'content': 0.14809808135032654, 'timestamp': '2025-09-10 02:53:44.598764', 'step': 14766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:44.674711', 'step': 14766, 'epoch': 2} {'type': 'loss', 'content': 0.1259942203760147, 'timestamp': '2025-09-10 02:53:44.680027', 'step': 14767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:44.786960', 'step': 14767, 'epoch': 2} {'type': 'loss', 'content': 0.1268811672925949, 'timestamp': '2025-09-10 02:53:44.792776', 'step': 14768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:44.861065', 'step': 14768, 'epoch': 2} {'type': 'loss', 'content': 0.12301459163427353, 'timestamp': '2025-09-10 02:53:44.864730', 'step': 14769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:44.931660', 'step': 14769, 'epoch': 2} {'type': 'loss', 'content': 0.13834232091903687, 'timestamp': '2025-09-10 02:53:44.933412', 'step': 14770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:45.008742', 'step': 14770, 'epoch': 2} {'type': 'loss', 'content': 0.10302034020423889, 'timestamp': '2025-09-10 02:53:45.010496', 'step': 14771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:45.083483', 'step': 14771, 'epoch': 2} {'type': 'loss', 'content': 0.1132374033331871, 'timestamp': '2025-09-10 02:53:45.089072', 'step': 14772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:45.160525', 'step': 14772, 'epoch': 2} {'type': 'loss', 'content': 0.15685170888900757, 'timestamp': '2025-09-10 02:53:45.163898', 'step': 14773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:45.224050', 'step': 14773, 'epoch': 2} {'type': 'loss', 'content': 0.16196319460868835, 'timestamp': '2025-09-10 02:53:45.225830', 'step': 14774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:45.280850', 'step': 14774, 'epoch': 2} {'type': 'loss', 'content': 0.14203812181949615, 'timestamp': '2025-09-10 02:53:45.285201', 'step': 14775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:45.350239', 'step': 14775, 'epoch': 2} {'type': 'loss', 'content': 0.08753811568021774, 'timestamp': '2025-09-10 02:53:45.355944', 'step': 14776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:45.411813', 'step': 14776, 'epoch': 2} {'type': 'loss', 'content': 0.14401118457317352, 'timestamp': '2025-09-10 02:53:45.414584', 'step': 14777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:45.472947', 'step': 14777, 'epoch': 2} {'type': 'loss', 'content': 0.17259851098060608, 'timestamp': '2025-09-10 02:53:45.474993', 'step': 14778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:45.534963', 'step': 14778, 'epoch': 2} {'type': 'loss', 'content': 0.1044580340385437, 'timestamp': '2025-09-10 02:53:45.536755', 'step': 14779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:45.590400', 'step': 14779, 'epoch': 2} {'type': 'loss', 'content': 0.16561681032180786, 'timestamp': '2025-09-10 02:53:45.596056', 'step': 14780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:45.652983', 'step': 14780, 'epoch': 2} {'type': 'loss', 'content': 0.2561471164226532, 'timestamp': '2025-09-10 02:53:45.657312', 'step': 14781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:45.715898', 'step': 14781, 'epoch': 2} {'type': 'loss', 'content': 0.1414416879415512, 'timestamp': '2025-09-10 02:53:45.718094', 'step': 14782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:45.793378', 'step': 14782, 'epoch': 2} {'type': 'loss', 'content': 0.09591149538755417, 'timestamp': '2025-09-10 02:53:45.796133', 'step': 14783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:45.877770', 'step': 14783, 'epoch': 2} {'type': 'loss', 'content': 0.0793619230389595, 'timestamp': '2025-09-10 02:53:45.883608', 'step': 14784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:45.949536', 'step': 14784, 'epoch': 2} {'type': 'loss', 'content': 0.1176767572760582, 'timestamp': '2025-09-10 02:53:45.953670', 'step': 14785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:46.030977', 'step': 14785, 'epoch': 2} {'type': 'loss', 'content': 0.1264287680387497, 'timestamp': '2025-09-10 02:53:46.032739', 'step': 14786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:46.105513', 'step': 14786, 'epoch': 2} {'type': 'loss', 'content': 0.08623212575912476, 'timestamp': '2025-09-10 02:53:46.108199', 'step': 14787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:46.165297', 'step': 14787, 'epoch': 2} {'type': 'loss', 'content': 0.1057843491435051, 'timestamp': '2025-09-10 02:53:46.171299', 'step': 14788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:46.243730', 'step': 14788, 'epoch': 2} {'type': 'loss', 'content': 0.07980918139219284, 'timestamp': '2025-09-10 02:53:46.245872', 'step': 14789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:46.321239', 'step': 14789, 'epoch': 2} {'type': 'loss', 'content': 0.16664597392082214, 'timestamp': '2025-09-10 02:53:46.322999', 'step': 14790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:46.411047', 'step': 14790, 'epoch': 2} {'type': 'loss', 'content': 0.07528342306613922, 'timestamp': '2025-09-10 02:53:46.412976', 'step': 14791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:46.484817', 'step': 14791, 'epoch': 2} {'type': 'loss', 'content': 0.12730196118354797, 'timestamp': '2025-09-10 02:53:46.490516', 'step': 14792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:46.576433', 'step': 14792, 'epoch': 2} {'type': 'loss', 'content': 0.12746185064315796, 'timestamp': '2025-09-10 02:53:46.578292', 'step': 14793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:46.646556', 'step': 14793, 'epoch': 2} {'type': 'loss', 'content': 0.06918572634458542, 'timestamp': '2025-09-10 02:53:46.648552', 'step': 14794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:46.703456', 'step': 14794, 'epoch': 2} {'type': 'loss', 'content': 0.08513525873422623, 'timestamp': '2025-09-10 02:53:46.705196', 'step': 14795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:46.774656', 'step': 14795, 'epoch': 2} {'type': 'loss', 'content': 0.12067460268735886, 'timestamp': '2025-09-10 02:53:46.781139', 'step': 14796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:46.847405', 'step': 14796, 'epoch': 2} {'type': 'loss', 'content': 0.20638614892959595, 'timestamp': '2025-09-10 02:53:46.852672', 'step': 14797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:46.918441', 'step': 14797, 'epoch': 2} {'type': 'loss', 'content': 0.06577379256486893, 'timestamp': '2025-09-10 02:53:46.920729', 'step': 14798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:46.991217', 'step': 14798, 'epoch': 2} {'type': 'loss', 'content': 0.1617511659860611, 'timestamp': '2025-09-10 02:53:46.993325', 'step': 14799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:47.046780', 'step': 14799, 'epoch': 2} {'type': 'loss', 'content': 0.11859080195426941, 'timestamp': '2025-09-10 02:53:47.052838', 'step': 14800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:47.108664', 'step': 14800, 'epoch': 2} {'type': 'loss', 'content': 0.1613510400056839, 'timestamp': '2025-09-10 02:53:47.111122', 'step': 14801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:47.172794', 'step': 14801, 'epoch': 2} {'type': 'loss', 'content': 0.17043671011924744, 'timestamp': '2025-09-10 02:53:47.175808', 'step': 14802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:47.235156', 'step': 14802, 'epoch': 2} {'type': 'loss', 'content': 0.06681349873542786, 'timestamp': '2025-09-10 02:53:47.237264', 'step': 14803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:47.305547', 'step': 14803, 'epoch': 2} {'type': 'loss', 'content': 0.19827888906002045, 'timestamp': '2025-09-10 02:53:47.311545', 'step': 14804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:47.378275', 'step': 14804, 'epoch': 2} {'type': 'loss', 'content': 0.09727207571268082, 'timestamp': '2025-09-10 02:53:47.380777', 'step': 14805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:47.447527', 'step': 14805, 'epoch': 2} {'type': 'loss', 'content': 0.14379343390464783, 'timestamp': '2025-09-10 02:53:47.449793', 'step': 14806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:47.512392', 'step': 14806, 'epoch': 2} {'type': 'loss', 'content': 0.07922517508268356, 'timestamp': '2025-09-10 02:53:47.518867', 'step': 14807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:47.579254', 'step': 14807, 'epoch': 2} {'type': 'loss', 'content': 0.09329482167959213, 'timestamp': '2025-09-10 02:53:47.584909', 'step': 14808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:47.649751', 'step': 14808, 'epoch': 2} {'type': 'loss', 'content': 0.04338820278644562, 'timestamp': '2025-09-10 02:53:47.651866', 'step': 14809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:47.712680', 'step': 14809, 'epoch': 2} {'type': 'loss', 'content': 0.0873861163854599, 'timestamp': '2025-09-10 02:53:47.714527', 'step': 14810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:47.773837', 'step': 14810, 'epoch': 2} {'type': 'loss', 'content': 0.0982995554804802, 'timestamp': '2025-09-10 02:53:47.775994', 'step': 14811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:47.841128', 'step': 14811, 'epoch': 2} {'type': 'loss', 'content': 0.13073095679283142, 'timestamp': '2025-09-10 02:53:47.847158', 'step': 14812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:47.905151', 'step': 14812, 'epoch': 2} {'type': 'loss', 'content': 0.11517320573329926, 'timestamp': '2025-09-10 02:53:47.907242', 'step': 14813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:47.990420', 'step': 14813, 'epoch': 2} {'type': 'loss', 'content': 0.13039030134677887, 'timestamp': '2025-09-10 02:53:47.999320', 'step': 14814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:48.099474', 'step': 14814, 'epoch': 2} {'type': 'loss', 'content': 0.1620543897151947, 'timestamp': '2025-09-10 02:53:48.102583', 'step': 14815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:48.175696', 'step': 14815, 'epoch': 2} {'type': 'loss', 'content': 0.08777327090501785, 'timestamp': '2025-09-10 02:53:48.181424', 'step': 14816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:48.253730', 'step': 14816, 'epoch': 2} {'type': 'loss', 'content': 0.18528681993484497, 'timestamp': '2025-09-10 02:53:48.255475', 'step': 14817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:48.342650', 'step': 14817, 'epoch': 2} {'type': 'loss', 'content': 0.05841200798749924, 'timestamp': '2025-09-10 02:53:48.344631', 'step': 14818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:48.425915', 'step': 14818, 'epoch': 2} {'type': 'loss', 'content': 0.11790762096643448, 'timestamp': '2025-09-10 02:53:48.427681', 'step': 14819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:48.492987', 'step': 14819, 'epoch': 2} {'type': 'loss', 'content': 0.13561266660690308, 'timestamp': '2025-09-10 02:53:48.499498', 'step': 14820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:48.570989', 'step': 14820, 'epoch': 2} {'type': 'loss', 'content': 0.1304500848054886, 'timestamp': '2025-09-10 02:53:48.575128', 'step': 14821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:48.633890', 'step': 14821, 'epoch': 2} {'type': 'loss', 'content': 0.09309572726488113, 'timestamp': '2025-09-10 02:53:48.636025', 'step': 14822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:48.691447', 'step': 14822, 'epoch': 2} {'type': 'loss', 'content': 0.0435110405087471, 'timestamp': '2025-09-10 02:53:48.693757', 'step': 14823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:48.749451', 'step': 14823, 'epoch': 2} {'type': 'loss', 'content': 0.14000698924064636, 'timestamp': '2025-09-10 02:53:48.754908', 'step': 14824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:48.812749', 'step': 14824, 'epoch': 2} {'type': 'loss', 'content': 0.1712273806333542, 'timestamp': '2025-09-10 02:53:48.814950', 'step': 14825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:48.874856', 'step': 14825, 'epoch': 2} {'type': 'loss', 'content': 0.04430146887898445, 'timestamp': '2025-09-10 02:53:48.876710', 'step': 14826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:48.934333', 'step': 14826, 'epoch': 2} {'type': 'loss', 'content': 0.05514843016862869, 'timestamp': '2025-09-10 02:53:48.936536', 'step': 14827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:48.992522', 'step': 14827, 'epoch': 2} {'type': 'loss', 'content': 0.18134503066539764, 'timestamp': '2025-09-10 02:53:48.998421', 'step': 14828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:49.060450', 'step': 14828, 'epoch': 2} {'type': 'loss', 'content': 0.11670712381601334, 'timestamp': '2025-09-10 02:53:49.063797', 'step': 14829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:49.119557', 'step': 14829, 'epoch': 2} {'type': 'loss', 'content': 0.11835388094186783, 'timestamp': '2025-09-10 02:53:49.121355', 'step': 14830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:49.177956', 'step': 14830, 'epoch': 2} {'type': 'loss', 'content': 0.09597074240446091, 'timestamp': '2025-09-10 02:53:49.179994', 'step': 14831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:49.233840', 'step': 14831, 'epoch': 2} {'type': 'loss', 'content': 0.12568551301956177, 'timestamp': '2025-09-10 02:53:49.239629', 'step': 14832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:49.293525', 'step': 14832, 'epoch': 2} {'type': 'loss', 'content': 0.14341647922992706, 'timestamp': '2025-09-10 02:53:49.295351', 'step': 14833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:49.354415', 'step': 14833, 'epoch': 2} {'type': 'loss', 'content': 0.13428887724876404, 'timestamp': '2025-09-10 02:53:49.356446', 'step': 14834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:49.417816', 'step': 14834, 'epoch': 2} {'type': 'loss', 'content': 0.10852041840553284, 'timestamp': '2025-09-10 02:53:49.419936', 'step': 14835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:49.473292', 'step': 14835, 'epoch': 2} {'type': 'loss', 'content': 0.16142971813678741, 'timestamp': '2025-09-10 02:53:49.478951', 'step': 14836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:49.549793', 'step': 14836, 'epoch': 2} {'type': 'loss', 'content': 0.1324661374092102, 'timestamp': '2025-09-10 02:53:49.554205', 'step': 14837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:49.626075', 'step': 14837, 'epoch': 2} {'type': 'loss', 'content': 0.06875859200954437, 'timestamp': '2025-09-10 02:53:49.627944', 'step': 14838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:49.687815', 'step': 14838, 'epoch': 2} {'type': 'loss', 'content': 0.08966399729251862, 'timestamp': '2025-09-10 02:53:49.690427', 'step': 14839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:49.751281', 'step': 14839, 'epoch': 2} {'type': 'loss', 'content': 0.126039057970047, 'timestamp': '2025-09-10 02:53:49.757553', 'step': 14840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:49.819210', 'step': 14840, 'epoch': 2} {'type': 'loss', 'content': 0.1324799656867981, 'timestamp': '2025-09-10 02:53:49.822994', 'step': 14841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:49.893237', 'step': 14841, 'epoch': 2} {'type': 'loss', 'content': 0.13279378414154053, 'timestamp': '2025-09-10 02:53:49.899041', 'step': 14842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:49.969114', 'step': 14842, 'epoch': 2} {'type': 'loss', 'content': 0.14549891650676727, 'timestamp': '2025-09-10 02:53:49.970995', 'step': 14843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:50.024787', 'step': 14843, 'epoch': 2} {'type': 'loss', 'content': 0.12681922316551208, 'timestamp': '2025-09-10 02:53:50.030383', 'step': 14844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:50.084170', 'step': 14844, 'epoch': 2} {'type': 'loss', 'content': 0.19417861104011536, 'timestamp': '2025-09-10 02:53:50.086064', 'step': 14845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:50.142806', 'step': 14845, 'epoch': 2} {'type': 'loss', 'content': 0.08476448059082031, 'timestamp': '2025-09-10 02:53:50.144855', 'step': 14846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:50.202715', 'step': 14846, 'epoch': 2} {'type': 'loss', 'content': 0.04943237826228142, 'timestamp': '2025-09-10 02:53:50.204904', 'step': 14847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:50.265672', 'step': 14847, 'epoch': 2} {'type': 'loss', 'content': 0.0820060521364212, 'timestamp': '2025-09-10 02:53:50.273272', 'step': 14848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:50.327769', 'step': 14848, 'epoch': 2} {'type': 'loss', 'content': 0.1667664498090744, 'timestamp': '2025-09-10 02:53:50.329613', 'step': 14849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:50.383860', 'step': 14849, 'epoch': 2} {'type': 'loss', 'content': 0.06445902585983276, 'timestamp': '2025-09-10 02:53:50.387130', 'step': 14850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:50.444673', 'step': 14850, 'epoch': 2} {'type': 'loss', 'content': 0.16159242391586304, 'timestamp': '2025-09-10 02:53:50.446500', 'step': 14851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:50.500727', 'step': 14851, 'epoch': 2} {'type': 'loss', 'content': 0.12796223163604736, 'timestamp': '2025-09-10 02:53:50.506377', 'step': 14852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:50.562910', 'step': 14852, 'epoch': 2} {'type': 'loss', 'content': 0.17308546602725983, 'timestamp': '2025-09-10 02:53:50.564844', 'step': 14853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:50.623082', 'step': 14853, 'epoch': 2} {'type': 'loss', 'content': 0.14220739901065826, 'timestamp': '2025-09-10 02:53:50.625293', 'step': 14854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:50.680926', 'step': 14854, 'epoch': 2} {'type': 'loss', 'content': 0.10086485743522644, 'timestamp': '2025-09-10 02:53:50.683224', 'step': 14855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:50.743080', 'step': 14855, 'epoch': 2} {'type': 'loss', 'content': 0.09954655915498734, 'timestamp': '2025-09-10 02:53:50.750416', 'step': 14856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:50.810001', 'step': 14856, 'epoch': 2} {'type': 'loss', 'content': 0.08658062666654587, 'timestamp': '2025-09-10 02:53:50.811998', 'step': 14857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:50.868346', 'step': 14857, 'epoch': 2} {'type': 'loss', 'content': 0.06490907073020935, 'timestamp': '2025-09-10 02:53:50.870409', 'step': 14858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:50.939476', 'step': 14858, 'epoch': 2} {'type': 'loss', 'content': 0.08264822512865067, 'timestamp': '2025-09-10 02:53:50.941190', 'step': 14859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:51.010127', 'step': 14859, 'epoch': 2} {'type': 'loss', 'content': 0.07915611565113068, 'timestamp': '2025-09-10 02:53:51.015754', 'step': 14860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:51.087312', 'step': 14860, 'epoch': 2} {'type': 'loss', 'content': 0.19512571394443512, 'timestamp': '2025-09-10 02:53:51.090252', 'step': 14861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:51.161299', 'step': 14861, 'epoch': 2} {'type': 'loss', 'content': 0.11974627524614334, 'timestamp': '2025-09-10 02:53:51.162898', 'step': 14862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:51.221027', 'step': 14862, 'epoch': 2} {'type': 'loss', 'content': 0.13263465464115143, 'timestamp': '2025-09-10 02:53:51.223100', 'step': 14863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:51.311768', 'step': 14863, 'epoch': 2} {'type': 'loss', 'content': 0.1409156322479248, 'timestamp': '2025-09-10 02:53:51.317536', 'step': 14864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:51.381885', 'step': 14864, 'epoch': 2} {'type': 'loss', 'content': 0.07971936464309692, 'timestamp': '2025-09-10 02:53:51.383872', 'step': 14865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:51.441934', 'step': 14865, 'epoch': 2} {'type': 'loss', 'content': 0.1249288022518158, 'timestamp': '2025-09-10 02:53:51.443803', 'step': 14866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:51.517236', 'step': 14866, 'epoch': 2} {'type': 'loss', 'content': 0.1091923639178276, 'timestamp': '2025-09-10 02:53:51.518887', 'step': 14867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:51.583436', 'step': 14867, 'epoch': 2} {'type': 'loss', 'content': 0.2844483554363251, 'timestamp': '2025-09-10 02:53:51.589323', 'step': 14868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:51.645165', 'step': 14868, 'epoch': 2} {'type': 'loss', 'content': 0.18379180133342743, 'timestamp': '2025-09-10 02:53:51.646988', 'step': 14869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:51.709608', 'step': 14869, 'epoch': 2} {'type': 'loss', 'content': 0.08043303340673447, 'timestamp': '2025-09-10 02:53:51.711840', 'step': 14870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:51.768623', 'step': 14870, 'epoch': 2} {'type': 'loss', 'content': 0.046100273728370667, 'timestamp': '2025-09-10 02:53:51.770638', 'step': 14871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:51.828437', 'step': 14871, 'epoch': 2} {'type': 'loss', 'content': 0.08007098734378815, 'timestamp': '2025-09-10 02:53:51.834992', 'step': 14872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:51.888920', 'step': 14872, 'epoch': 2} {'type': 'loss', 'content': 0.1404489129781723, 'timestamp': '2025-09-10 02:53:51.890792', 'step': 14873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:51.946455', 'step': 14873, 'epoch': 2} {'type': 'loss', 'content': 0.04740885645151138, 'timestamp': '2025-09-10 02:53:51.948222', 'step': 14874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:52.001682', 'step': 14874, 'epoch': 2} {'type': 'loss', 'content': 0.04583727568387985, 'timestamp': '2025-09-10 02:53:52.003681', 'step': 14875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:52.063317', 'step': 14875, 'epoch': 2} {'type': 'loss', 'content': 0.07674061506986618, 'timestamp': '2025-09-10 02:53:52.069047', 'step': 14876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:53:52.125732', 'step': 14876, 'epoch': 2} {'type': 'loss', 'content': 0.2104988843202591, 'timestamp': '2025-09-10 02:53:52.127509', 'step': 14877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:52.182358', 'step': 14877, 'epoch': 2} {'type': 'loss', 'content': 0.05873197689652443, 'timestamp': '2025-09-10 02:53:52.184355', 'step': 14878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:52.237884', 'step': 14878, 'epoch': 2} {'type': 'loss', 'content': 0.1112295389175415, 'timestamp': '2025-09-10 02:53:52.239946', 'step': 14879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:52.306288', 'step': 14879, 'epoch': 2} {'type': 'loss', 'content': 0.10834550112485886, 'timestamp': '2025-09-10 02:53:52.311902', 'step': 14880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:52.364798', 'step': 14880, 'epoch': 2} {'type': 'loss', 'content': 0.042373839765787125, 'timestamp': '2025-09-10 02:53:52.366644', 'step': 14881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:52.420884', 'step': 14881, 'epoch': 2} {'type': 'loss', 'content': 0.09208494424819946, 'timestamp': '2025-09-10 02:53:52.422568', 'step': 14882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:52.477206', 'step': 14882, 'epoch': 2} {'type': 'loss', 'content': 0.12988626956939697, 'timestamp': '2025-09-10 02:53:52.479229', 'step': 14883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:52.532025', 'step': 14883, 'epoch': 2} {'type': 'loss', 'content': 0.06349518895149231, 'timestamp': '2025-09-10 02:53:52.537640', 'step': 14884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:52.591904', 'step': 14884, 'epoch': 2} {'type': 'loss', 'content': 0.16066457331180573, 'timestamp': '2025-09-10 02:53:52.593830', 'step': 14885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:52.649679', 'step': 14885, 'epoch': 2} {'type': 'loss', 'content': 0.06495217978954315, 'timestamp': '2025-09-10 02:53:52.651604', 'step': 14886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:52.705010', 'step': 14886, 'epoch': 2} {'type': 'loss', 'content': 0.07899007946252823, 'timestamp': '2025-09-10 02:53:52.706965', 'step': 14887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:52.761971', 'step': 14887, 'epoch': 2} {'type': 'loss', 'content': 0.063437819480896, 'timestamp': '2025-09-10 02:53:52.768043', 'step': 14888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:52.821358', 'step': 14888, 'epoch': 2} {'type': 'loss', 'content': 0.14462856948375702, 'timestamp': '2025-09-10 02:53:52.823034', 'step': 14889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:52.876155', 'step': 14889, 'epoch': 2} {'type': 'loss', 'content': 0.1003589779138565, 'timestamp': '2025-09-10 02:53:52.877975', 'step': 14890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:52.935394', 'step': 14890, 'epoch': 2} {'type': 'loss', 'content': 0.1328165978193283, 'timestamp': '2025-09-10 02:53:52.937222', 'step': 14891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:52.990958', 'step': 14891, 'epoch': 2} {'type': 'loss', 'content': 0.16088970005512238, 'timestamp': '2025-09-10 02:53:52.996884', 'step': 14892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:53.053201', 'step': 14892, 'epoch': 2} {'type': 'loss', 'content': 0.07430503517389297, 'timestamp': '2025-09-10 02:53:53.055282', 'step': 14893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:53.112357', 'step': 14893, 'epoch': 2} {'type': 'loss', 'content': 0.1098284125328064, 'timestamp': '2025-09-10 02:53:53.114302', 'step': 14894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:53.168366', 'step': 14894, 'epoch': 2} {'type': 'loss', 'content': 0.11730366200208664, 'timestamp': '2025-09-10 02:53:53.170142', 'step': 14895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:53.227534', 'step': 14895, 'epoch': 2} {'type': 'loss', 'content': 0.07350371032953262, 'timestamp': '2025-09-10 02:53:53.233367', 'step': 14896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:53.288872', 'step': 14896, 'epoch': 2} {'type': 'loss', 'content': 0.16196973621845245, 'timestamp': '2025-09-10 02:53:53.291105', 'step': 14897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:53.347928', 'step': 14897, 'epoch': 2} {'type': 'loss', 'content': 0.03560672700405121, 'timestamp': '2025-09-10 02:53:53.349927', 'step': 14898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:53.406459', 'step': 14898, 'epoch': 2} {'type': 'loss', 'content': 0.09547895193099976, 'timestamp': '2025-09-10 02:53:53.408404', 'step': 14899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:53.463109', 'step': 14899, 'epoch': 2} {'type': 'loss', 'content': 0.19094009697437286, 'timestamp': '2025-09-10 02:53:53.469189', 'step': 14900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:53.524243', 'step': 14900, 'epoch': 2} {'type': 'loss', 'content': 0.10324662178754807, 'timestamp': '2025-09-10 02:53:53.526256', 'step': 14901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:53.580369', 'step': 14901, 'epoch': 2} {'type': 'loss', 'content': 0.10021118819713593, 'timestamp': '2025-09-10 02:53:53.582361', 'step': 14902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:53.637638', 'step': 14902, 'epoch': 2} {'type': 'loss', 'content': 0.19628389179706573, 'timestamp': '2025-09-10 02:53:53.639589', 'step': 14903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:53.694547', 'step': 14903, 'epoch': 2} {'type': 'loss', 'content': 0.039558008313179016, 'timestamp': '2025-09-10 02:53:53.700536', 'step': 14904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:53.757341', 'step': 14904, 'epoch': 2} {'type': 'loss', 'content': 0.09918700158596039, 'timestamp': '2025-09-10 02:53:53.759327', 'step': 14905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:53.816067', 'step': 14905, 'epoch': 2} {'type': 'loss', 'content': 0.09531596302986145, 'timestamp': '2025-09-10 02:53:53.817810', 'step': 14906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:53.873741', 'step': 14906, 'epoch': 2} {'type': 'loss', 'content': 0.2206449806690216, 'timestamp': '2025-09-10 02:53:53.875758', 'step': 14907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:53.930633', 'step': 14907, 'epoch': 2} {'type': 'loss', 'content': 0.1629001796245575, 'timestamp': '2025-09-10 02:53:53.936766', 'step': 14908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:53.991982', 'step': 14908, 'epoch': 2} {'type': 'loss', 'content': 0.09749605506658554, 'timestamp': '2025-09-10 02:53:53.993653', 'step': 14909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:54.050767', 'step': 14909, 'epoch': 2} {'type': 'loss', 'content': 0.09201379120349884, 'timestamp': '2025-09-10 02:53:54.052741', 'step': 14910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:53:54.112173', 'step': 14910, 'epoch': 2} {'type': 'loss', 'content': 0.12732450664043427, 'timestamp': '2025-09-10 02:53:54.114100', 'step': 14911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:53:54.174577', 'step': 14911, 'epoch': 2} {'type': 'loss', 'content': 0.054365124553442, 'timestamp': '2025-09-10 02:53:54.180538', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:54:08.340246', 'step': 14912, 'epoch': 2} {'type': 'pplx', 'content': 12915.234898587622, 'timestamp': '2025-09-10 02:54:08.343206', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:08.397272', 'step': 14912, 'epoch': 2} {'type': 'loss', 'content': 0.0944051519036293, 'timestamp': '2025-09-10 02:54:08.399425', 'step': 14913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:08.454109', 'step': 14913, 'epoch': 2} {'type': 'loss', 'content': 0.04148948937654495, 'timestamp': '2025-09-10 02:54:08.455979', 'step': 14914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:08.510344', 'step': 14914, 'epoch': 2} {'type': 'loss', 'content': 0.3412366509437561, 'timestamp': '2025-09-10 02:54:08.512352', 'step': 14915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:08.565491', 'step': 14915, 'epoch': 2} {'type': 'loss', 'content': 0.14310508966445923, 'timestamp': '2025-09-10 02:54:08.571387', 'step': 14916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:08.625462', 'step': 14916, 'epoch': 2} {'type': 'loss', 'content': 0.12339933216571808, 'timestamp': '2025-09-10 02:54:08.627535', 'step': 14917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:08.680794', 'step': 14917, 'epoch': 2} {'type': 'loss', 'content': 0.08081404119729996, 'timestamp': '2025-09-10 02:54:08.682747', 'step': 14918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:08.736319', 'step': 14918, 'epoch': 2} {'type': 'loss', 'content': 0.05937718227505684, 'timestamp': '2025-09-10 02:54:08.738321', 'step': 14919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1040006410960.0}, 'timestamp': '2025-09-10 02:54:08.791641', 'step': 14919, 'epoch': 2} {'type': 'loss', 'content': 0.22422584891319275, 'timestamp': '2025-09-10 02:54:08.797575', 'step': 14920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:08.851889', 'step': 14920, 'epoch': 3} {'type': 'loss', 'content': 0.03427399694919586, 'timestamp': '2025-09-10 02:54:08.853974', 'step': 14921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:08.907155', 'step': 14921, 'epoch': 3} {'type': 'loss', 'content': 0.09499628096818924, 'timestamp': '2025-09-10 02:54:08.909182', 'step': 14922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:08.962672', 'step': 14922, 'epoch': 3} {'type': 'loss', 'content': 0.08939079195261002, 'timestamp': '2025-09-10 02:54:08.964754', 'step': 14923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:09.018485', 'step': 14923, 'epoch': 3} {'type': 'loss', 'content': 0.11936279386281967, 'timestamp': '2025-09-10 02:54:09.024607', 'step': 14924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:09.076674', 'step': 14924, 'epoch': 3} {'type': 'loss', 'content': 0.15499292314052582, 'timestamp': '2025-09-10 02:54:09.078619', 'step': 14925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:09.131909', 'step': 14925, 'epoch': 3} {'type': 'loss', 'content': 0.06173287704586983, 'timestamp': '2025-09-10 02:54:09.134169', 'step': 14926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:09.187633', 'step': 14926, 'epoch': 3} {'type': 'loss', 'content': 0.16328833997249603, 'timestamp': '2025-09-10 02:54:09.189584', 'step': 14927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:09.242293', 'step': 14927, 'epoch': 3} {'type': 'loss', 'content': 0.036850783973932266, 'timestamp': '2025-09-10 02:54:09.248161', 'step': 14928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:09.301271', 'step': 14928, 'epoch': 3} {'type': 'loss', 'content': 0.09496788680553436, 'timestamp': '2025-09-10 02:54:09.303327', 'step': 14929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:09.356950', 'step': 14929, 'epoch': 3} {'type': 'loss', 'content': 0.09902075678110123, 'timestamp': '2025-09-10 02:54:09.358938', 'step': 14930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:09.412854', 'step': 14930, 'epoch': 3} {'type': 'loss', 'content': 0.05496012419462204, 'timestamp': '2025-09-10 02:54:09.415186', 'step': 14931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:09.470851', 'step': 14931, 'epoch': 3} {'type': 'loss', 'content': 0.13702787458896637, 'timestamp': '2025-09-10 02:54:09.476950', 'step': 14932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:09.530550', 'step': 14932, 'epoch': 3} {'type': 'loss', 'content': 0.10471197217702866, 'timestamp': '2025-09-10 02:54:09.532615', 'step': 14933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:09.588045', 'step': 14933, 'epoch': 3} {'type': 'loss', 'content': 0.07088273018598557, 'timestamp': '2025-09-10 02:54:09.590092', 'step': 14934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:09.643854', 'step': 14934, 'epoch': 3} {'type': 'loss', 'content': 0.12026958167552948, 'timestamp': '2025-09-10 02:54:09.646170', 'step': 14935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:09.699253', 'step': 14935, 'epoch': 3} {'type': 'loss', 'content': 0.18131791055202484, 'timestamp': '2025-09-10 02:54:09.705351', 'step': 14936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:09.757770', 'step': 14936, 'epoch': 3} {'type': 'loss', 'content': 0.03775695338845253, 'timestamp': '2025-09-10 02:54:09.759863', 'step': 14937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:09.813668', 'step': 14937, 'epoch': 3} {'type': 'loss', 'content': 0.10540634393692017, 'timestamp': '2025-09-10 02:54:09.815770', 'step': 14938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:09.869268', 'step': 14938, 'epoch': 3} {'type': 'loss', 'content': 0.16012269258499146, 'timestamp': '2025-09-10 02:54:09.871621', 'step': 14939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:09.925091', 'step': 14939, 'epoch': 3} {'type': 'loss', 'content': 0.05044950917363167, 'timestamp': '2025-09-10 02:54:09.930983', 'step': 14940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:09.983457', 'step': 14940, 'epoch': 3} {'type': 'loss', 'content': 0.062424320727586746, 'timestamp': '2025-09-10 02:54:09.985447', 'step': 14941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:10.039174', 'step': 14941, 'epoch': 3} {'type': 'loss', 'content': 0.10911248624324799, 'timestamp': '2025-09-10 02:54:10.041092', 'step': 14942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:10.094800', 'step': 14942, 'epoch': 3} {'type': 'loss', 'content': 0.025958258658647537, 'timestamp': '2025-09-10 02:54:10.096747', 'step': 14943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:10.151908', 'step': 14943, 'epoch': 3} {'type': 'loss', 'content': 0.1179327443242073, 'timestamp': '2025-09-10 02:54:10.157579', 'step': 14944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:10.210311', 'step': 14944, 'epoch': 3} {'type': 'loss', 'content': 0.053402889519929886, 'timestamp': '2025-09-10 02:54:10.212410', 'step': 14945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:10.265624', 'step': 14945, 'epoch': 3} {'type': 'loss', 'content': 0.1338808834552765, 'timestamp': '2025-09-10 02:54:10.267735', 'step': 14946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:10.321898', 'step': 14946, 'epoch': 3} {'type': 'loss', 'content': 0.14579470455646515, 'timestamp': '2025-09-10 02:54:10.323847', 'step': 14947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:10.376950', 'step': 14947, 'epoch': 3} {'type': 'loss', 'content': 0.0758657306432724, 'timestamp': '2025-09-10 02:54:10.382525', 'step': 14948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:10.435256', 'step': 14948, 'epoch': 3} {'type': 'loss', 'content': 0.11476584523916245, 'timestamp': '2025-09-10 02:54:10.437583', 'step': 14949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:10.490377', 'step': 14949, 'epoch': 3} {'type': 'loss', 'content': 0.046991586685180664, 'timestamp': '2025-09-10 02:54:10.492479', 'step': 14950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:10.546539', 'step': 14950, 'epoch': 3} {'type': 'loss', 'content': 0.1139754056930542, 'timestamp': '2025-09-10 02:54:10.548635', 'step': 14951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:10.603834', 'step': 14951, 'epoch': 3} {'type': 'loss', 'content': 0.0475672222673893, 'timestamp': '2025-09-10 02:54:10.610168', 'step': 14952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:10.666129', 'step': 14952, 'epoch': 3} {'type': 'loss', 'content': 0.1689613312482834, 'timestamp': '2025-09-10 02:54:10.668130', 'step': 14953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:10.722378', 'step': 14953, 'epoch': 3} {'type': 'loss', 'content': 0.10309738665819168, 'timestamp': '2025-09-10 02:54:10.724460', 'step': 14954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:10.777696', 'step': 14954, 'epoch': 3} {'type': 'loss', 'content': 0.1613984853029251, 'timestamp': '2025-09-10 02:54:10.779719', 'step': 14955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:10.833374', 'step': 14955, 'epoch': 3} {'type': 'loss', 'content': 0.046136509627103806, 'timestamp': '2025-09-10 02:54:10.838970', 'step': 14956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:10.891718', 'step': 14956, 'epoch': 3} {'type': 'loss', 'content': 0.06987274438142776, 'timestamp': '2025-09-10 02:54:10.893720', 'step': 14957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:10.947617', 'step': 14957, 'epoch': 3} {'type': 'loss', 'content': 0.0883234366774559, 'timestamp': '2025-09-10 02:54:10.949526', 'step': 14958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:11.003175', 'step': 14958, 'epoch': 3} {'type': 'loss', 'content': 0.13156341016292572, 'timestamp': '2025-09-10 02:54:11.005304', 'step': 14959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:11.058737', 'step': 14959, 'epoch': 3} {'type': 'loss', 'content': 0.1059679165482521, 'timestamp': '2025-09-10 02:54:11.064485', 'step': 14960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:11.117732', 'step': 14960, 'epoch': 3} {'type': 'loss', 'content': 0.07352027297019958, 'timestamp': '2025-09-10 02:54:11.120066', 'step': 14961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:11.174482', 'step': 14961, 'epoch': 3} {'type': 'loss', 'content': 0.12358060479164124, 'timestamp': '2025-09-10 02:54:11.176490', 'step': 14962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:11.230388', 'step': 14962, 'epoch': 3} {'type': 'loss', 'content': 0.1415865123271942, 'timestamp': '2025-09-10 02:54:11.232645', 'step': 14963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:11.288419', 'step': 14963, 'epoch': 3} {'type': 'loss', 'content': 0.12563858926296234, 'timestamp': '2025-09-10 02:54:11.294379', 'step': 14964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:11.348683', 'step': 14964, 'epoch': 3} {'type': 'loss', 'content': 0.15488885343074799, 'timestamp': '2025-09-10 02:54:11.350881', 'step': 14965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:11.405834', 'step': 14965, 'epoch': 3} {'type': 'loss', 'content': 0.09043562412261963, 'timestamp': '2025-09-10 02:54:11.407986', 'step': 14966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:11.461686', 'step': 14966, 'epoch': 3} {'type': 'loss', 'content': 0.12024915218353271, 'timestamp': '2025-09-10 02:54:11.463390', 'step': 14967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:11.517030', 'step': 14967, 'epoch': 3} {'type': 'loss', 'content': 0.14791138470172882, 'timestamp': '2025-09-10 02:54:11.522599', 'step': 14968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:11.574865', 'step': 14968, 'epoch': 3} {'type': 'loss', 'content': 0.13257361948490143, 'timestamp': '2025-09-10 02:54:11.576755', 'step': 14969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:11.630772', 'step': 14969, 'epoch': 3} {'type': 'loss', 'content': 0.07036878168582916, 'timestamp': '2025-09-10 02:54:11.632955', 'step': 14970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:11.687448', 'step': 14970, 'epoch': 3} {'type': 'loss', 'content': 0.12486882507801056, 'timestamp': '2025-09-10 02:54:11.689493', 'step': 14971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:11.743148', 'step': 14971, 'epoch': 3} {'type': 'loss', 'content': 0.193232923746109, 'timestamp': '2025-09-10 02:54:11.748974', 'step': 14972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:11.801529', 'step': 14972, 'epoch': 3} {'type': 'loss', 'content': 0.07607080042362213, 'timestamp': '2025-09-10 02:54:11.803520', 'step': 14973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:11.857171', 'step': 14973, 'epoch': 3} {'type': 'loss', 'content': 0.07957493513822556, 'timestamp': '2025-09-10 02:54:11.859464', 'step': 14974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:11.914328', 'step': 14974, 'epoch': 3} {'type': 'loss', 'content': 0.1122317686676979, 'timestamp': '2025-09-10 02:54:11.916477', 'step': 14975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:11.970125', 'step': 14975, 'epoch': 3} {'type': 'loss', 'content': 0.06885414570569992, 'timestamp': '2025-09-10 02:54:11.975790', 'step': 14976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:12.029794', 'step': 14976, 'epoch': 3} {'type': 'loss', 'content': 0.08101309090852737, 'timestamp': '2025-09-10 02:54:12.031838', 'step': 14977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:12.085477', 'step': 14977, 'epoch': 3} {'type': 'loss', 'content': 0.1585632562637329, 'timestamp': '2025-09-10 02:54:12.087767', 'step': 14978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:12.142554', 'step': 14978, 'epoch': 3} {'type': 'loss', 'content': 0.14537812769412994, 'timestamp': '2025-09-10 02:54:12.144752', 'step': 14979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:12.198439', 'step': 14979, 'epoch': 3} {'type': 'loss', 'content': 0.11342156678438187, 'timestamp': '2025-09-10 02:54:12.204600', 'step': 14980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:12.263690', 'step': 14980, 'epoch': 3} {'type': 'loss', 'content': 0.11374108493328094, 'timestamp': '2025-09-10 02:54:12.265735', 'step': 14981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:12.318718', 'step': 14981, 'epoch': 3} {'type': 'loss', 'content': 0.07731219381093979, 'timestamp': '2025-09-10 02:54:12.320834', 'step': 14982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:12.375004', 'step': 14982, 'epoch': 3} {'type': 'loss', 'content': 0.22573575377464294, 'timestamp': '2025-09-10 02:54:12.377192', 'step': 14983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:12.430923', 'step': 14983, 'epoch': 3} {'type': 'loss', 'content': 0.07606605440378189, 'timestamp': '2025-09-10 02:54:12.436808', 'step': 14984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:12.490512', 'step': 14984, 'epoch': 3} {'type': 'loss', 'content': 0.04259825497865677, 'timestamp': '2025-09-10 02:54:12.492715', 'step': 14985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:12.546123', 'step': 14985, 'epoch': 3} {'type': 'loss', 'content': 0.10372360795736313, 'timestamp': '2025-09-10 02:54:12.548382', 'step': 14986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:12.602036', 'step': 14986, 'epoch': 3} {'type': 'loss', 'content': 0.04737606644630432, 'timestamp': '2025-09-10 02:54:12.604406', 'step': 14987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:12.659065', 'step': 14987, 'epoch': 3} {'type': 'loss', 'content': 0.11456981301307678, 'timestamp': '2025-09-10 02:54:12.664970', 'step': 14988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:12.718443', 'step': 14988, 'epoch': 3} {'type': 'loss', 'content': 0.15033745765686035, 'timestamp': '2025-09-10 02:54:12.720520', 'step': 14989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:12.774482', 'step': 14989, 'epoch': 3} {'type': 'loss', 'content': 0.1107981950044632, 'timestamp': '2025-09-10 02:54:12.776643', 'step': 14990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:12.832679', 'step': 14990, 'epoch': 3} {'type': 'loss', 'content': 0.10678746551275253, 'timestamp': '2025-09-10 02:54:12.834815', 'step': 14991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:12.888793', 'step': 14991, 'epoch': 3} {'type': 'loss', 'content': 0.08277128636837006, 'timestamp': '2025-09-10 02:54:12.894706', 'step': 14992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:12.948036', 'step': 14992, 'epoch': 3} {'type': 'loss', 'content': 0.12180854380130768, 'timestamp': '2025-09-10 02:54:12.950021', 'step': 14993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:13.004123', 'step': 14993, 'epoch': 3} {'type': 'loss', 'content': 0.07446244359016418, 'timestamp': '2025-09-10 02:54:13.006047', 'step': 14994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:13.060151', 'step': 14994, 'epoch': 3} {'type': 'loss', 'content': 0.12943623960018158, 'timestamp': '2025-09-10 02:54:13.062292', 'step': 14995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:13.116969', 'step': 14995, 'epoch': 3} {'type': 'loss', 'content': 0.12295683473348618, 'timestamp': '2025-09-10 02:54:13.122670', 'step': 14996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:13.175067', 'step': 14996, 'epoch': 3} {'type': 'loss', 'content': 0.07600974291563034, 'timestamp': '2025-09-10 02:54:13.176980', 'step': 14997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:13.230232', 'step': 14997, 'epoch': 3} {'type': 'loss', 'content': 0.07079586386680603, 'timestamp': '2025-09-10 02:54:13.232401', 'step': 14998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:13.286241', 'step': 14998, 'epoch': 3} {'type': 'loss', 'content': 0.08637639880180359, 'timestamp': '2025-09-10 02:54:13.288466', 'step': 14999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:13.342125', 'step': 14999, 'epoch': 3} {'type': 'loss', 'content': 0.11117376387119293, 'timestamp': '2025-09-10 02:54:13.348157', 'step': 15000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15000', 'timestamp': '2025-09-10 02:54:13.710192', 'step': 15000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:13.767860', 'step': 15000, 'epoch': 3} {'type': 'loss', 'content': 0.09595939517021179, 'timestamp': '2025-09-10 02:54:13.769848', 'step': 15001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:13.825347', 'step': 15001, 'epoch': 3} {'type': 'loss', 'content': 0.128793865442276, 'timestamp': '2025-09-10 02:54:13.827603', 'step': 15002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:13.881206', 'step': 15002, 'epoch': 3} {'type': 'loss', 'content': 0.11868050694465637, 'timestamp': '2025-09-10 02:54:13.883116', 'step': 15003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:13.936858', 'step': 15003, 'epoch': 3} {'type': 'loss', 'content': 0.10957943648099899, 'timestamp': '2025-09-10 02:54:13.942945', 'step': 15004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:13.995964', 'step': 15004, 'epoch': 3} {'type': 'loss', 'content': 0.06501729041337967, 'timestamp': '2025-09-10 02:54:13.997876', 'step': 15005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:14.051349', 'step': 15005, 'epoch': 3} {'type': 'loss', 'content': 0.07175151258707047, 'timestamp': '2025-09-10 02:54:14.053607', 'step': 15006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:14.109265', 'step': 15006, 'epoch': 3} {'type': 'loss', 'content': 0.12214295566082001, 'timestamp': '2025-09-10 02:54:14.111475', 'step': 15007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:14.166062', 'step': 15007, 'epoch': 3} {'type': 'loss', 'content': 0.1422816812992096, 'timestamp': '2025-09-10 02:54:14.172073', 'step': 15008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:14.224808', 'step': 15008, 'epoch': 3} {'type': 'loss', 'content': 0.06785187870264053, 'timestamp': '2025-09-10 02:54:14.226928', 'step': 15009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:14.280388', 'step': 15009, 'epoch': 3} {'type': 'loss', 'content': 0.1609724760055542, 'timestamp': '2025-09-10 02:54:14.282827', 'step': 15010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:14.337048', 'step': 15010, 'epoch': 3} {'type': 'loss', 'content': 0.07314544916152954, 'timestamp': '2025-09-10 02:54:14.339016', 'step': 15011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:14.392183', 'step': 15011, 'epoch': 3} {'type': 'loss', 'content': 0.07182417064905167, 'timestamp': '2025-09-10 02:54:14.398302', 'step': 15012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:14.452361', 'step': 15012, 'epoch': 3} {'type': 'loss', 'content': 0.10986985266208649, 'timestamp': '2025-09-10 02:54:14.454352', 'step': 15013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:14.507619', 'step': 15013, 'epoch': 3} {'type': 'loss', 'content': 0.06565312296152115, 'timestamp': '2025-09-10 02:54:14.509736', 'step': 15014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:14.563331', 'step': 15014, 'epoch': 3} {'type': 'loss', 'content': 0.06918511539697647, 'timestamp': '2025-09-10 02:54:14.565497', 'step': 15015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:14.618689', 'step': 15015, 'epoch': 3} {'type': 'loss', 'content': 0.1244855672121048, 'timestamp': '2025-09-10 02:54:14.624492', 'step': 15016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:14.677223', 'step': 15016, 'epoch': 3} {'type': 'loss', 'content': 0.09934385120868683, 'timestamp': '2025-09-10 02:54:14.679182', 'step': 15017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:14.732701', 'step': 15017, 'epoch': 3} {'type': 'loss', 'content': 0.11732077598571777, 'timestamp': '2025-09-10 02:54:14.734599', 'step': 15018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:14.787850', 'step': 15018, 'epoch': 3} {'type': 'loss', 'content': 0.0842437818646431, 'timestamp': '2025-09-10 02:54:14.789936', 'step': 15019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:14.843458', 'step': 15019, 'epoch': 3} {'type': 'loss', 'content': 0.1110147014260292, 'timestamp': '2025-09-10 02:54:14.849365', 'step': 15020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:14.902491', 'step': 15020, 'epoch': 3} {'type': 'loss', 'content': 0.15920838713645935, 'timestamp': '2025-09-10 02:54:14.904432', 'step': 15021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:14.959436', 'step': 15021, 'epoch': 3} {'type': 'loss', 'content': 0.06266700476408005, 'timestamp': '2025-09-10 02:54:14.961427', 'step': 15022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:15.014399', 'step': 15022, 'epoch': 3} {'type': 'loss', 'content': 0.17115303874015808, 'timestamp': '2025-09-10 02:54:15.016503', 'step': 15023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:15.069852', 'step': 15023, 'epoch': 3} {'type': 'loss', 'content': 0.1405307799577713, 'timestamp': '2025-09-10 02:54:15.075539', 'step': 15024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:15.127942', 'step': 15024, 'epoch': 3} {'type': 'loss', 'content': 0.12165374308824539, 'timestamp': '2025-09-10 02:54:15.129991', 'step': 15025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:15.182940', 'step': 15025, 'epoch': 3} {'type': 'loss', 'content': 0.09552387148141861, 'timestamp': '2025-09-10 02:54:15.185576', 'step': 15026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:15.239067', 'step': 15026, 'epoch': 3} {'type': 'loss', 'content': 0.11225858330726624, 'timestamp': '2025-09-10 02:54:15.241145', 'step': 15027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:15.294332', 'step': 15027, 'epoch': 3} {'type': 'loss', 'content': 0.137289360165596, 'timestamp': '2025-09-10 02:54:15.299970', 'step': 15028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:15.353434', 'step': 15028, 'epoch': 3} {'type': 'loss', 'content': 0.07720000296831131, 'timestamp': '2025-09-10 02:54:15.355498', 'step': 15029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:15.411676', 'step': 15029, 'epoch': 3} {'type': 'loss', 'content': 0.13123440742492676, 'timestamp': '2025-09-10 02:54:15.413681', 'step': 15030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:15.468883', 'step': 15030, 'epoch': 3} {'type': 'loss', 'content': 0.07512735575437546, 'timestamp': '2025-09-10 02:54:15.470846', 'step': 15031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:15.524892', 'step': 15031, 'epoch': 3} {'type': 'loss', 'content': 0.08181316405534744, 'timestamp': '2025-09-10 02:54:15.530728', 'step': 15032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:15.584171', 'step': 15032, 'epoch': 3} {'type': 'loss', 'content': 0.095271535217762, 'timestamp': '2025-09-10 02:54:15.586084', 'step': 15033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:15.638942', 'step': 15033, 'epoch': 3} {'type': 'loss', 'content': 0.21454700827598572, 'timestamp': '2025-09-10 02:54:15.640994', 'step': 15034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:15.694265', 'step': 15034, 'epoch': 3} {'type': 'loss', 'content': 0.0708664208650589, 'timestamp': '2025-09-10 02:54:15.696609', 'step': 15035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:15.750314', 'step': 15035, 'epoch': 3} {'type': 'loss', 'content': 0.18344908952713013, 'timestamp': '2025-09-10 02:54:15.756179', 'step': 15036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:15.809121', 'step': 15036, 'epoch': 3} {'type': 'loss', 'content': 0.11798471212387085, 'timestamp': '2025-09-10 02:54:15.811063', 'step': 15037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:15.865587', 'step': 15037, 'epoch': 3} {'type': 'loss', 'content': 0.13136152923107147, 'timestamp': '2025-09-10 02:54:15.867615', 'step': 15038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:15.921814', 'step': 15038, 'epoch': 3} {'type': 'loss', 'content': 0.0497424453496933, 'timestamp': '2025-09-10 02:54:15.923935', 'step': 15039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:15.977440', 'step': 15039, 'epoch': 3} {'type': 'loss', 'content': 0.19441087543964386, 'timestamp': '2025-09-10 02:54:15.983326', 'step': 15040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:16.036360', 'step': 15040, 'epoch': 3} {'type': 'loss', 'content': 0.16214604675769806, 'timestamp': '2025-09-10 02:54:16.038467', 'step': 15041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:16.091606', 'step': 15041, 'epoch': 3} {'type': 'loss', 'content': 0.10520590096712112, 'timestamp': '2025-09-10 02:54:16.093828', 'step': 15042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:16.146968', 'step': 15042, 'epoch': 3} {'type': 'loss', 'content': 0.20143690705299377, 'timestamp': '2025-09-10 02:54:16.148978', 'step': 15043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:16.202435', 'step': 15043, 'epoch': 3} {'type': 'loss', 'content': 0.10468419641256332, 'timestamp': '2025-09-10 02:54:16.208283', 'step': 15044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:16.260683', 'step': 15044, 'epoch': 3} {'type': 'loss', 'content': 0.02778121642768383, 'timestamp': '2025-09-10 02:54:16.262876', 'step': 15045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:16.316621', 'step': 15045, 'epoch': 3} {'type': 'loss', 'content': 0.05299922823905945, 'timestamp': '2025-09-10 02:54:16.318845', 'step': 15046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:16.372274', 'step': 15046, 'epoch': 3} {'type': 'loss', 'content': 0.11474870145320892, 'timestamp': '2025-09-10 02:54:16.374430', 'step': 15047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:16.427920', 'step': 15047, 'epoch': 3} {'type': 'loss', 'content': 0.2566433250904083, 'timestamp': '2025-09-10 02:54:16.433774', 'step': 15048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:16.487351', 'step': 15048, 'epoch': 3} {'type': 'loss', 'content': 0.15400178730487823, 'timestamp': '2025-09-10 02:54:16.489747', 'step': 15049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:16.544281', 'step': 15049, 'epoch': 3} {'type': 'loss', 'content': 0.12834535539150238, 'timestamp': '2025-09-10 02:54:16.546696', 'step': 15050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:16.600343', 'step': 15050, 'epoch': 3} {'type': 'loss', 'content': 0.08839064091444016, 'timestamp': '2025-09-10 02:54:16.602574', 'step': 15051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:16.655676', 'step': 15051, 'epoch': 3} {'type': 'loss', 'content': 0.10999307781457901, 'timestamp': '2025-09-10 02:54:16.661606', 'step': 15052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:16.714481', 'step': 15052, 'epoch': 3} {'type': 'loss', 'content': 0.180553138256073, 'timestamp': '2025-09-10 02:54:16.716724', 'step': 15053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:16.770059', 'step': 15053, 'epoch': 3} {'type': 'loss', 'content': 0.1697656810283661, 'timestamp': '2025-09-10 02:54:16.772173', 'step': 15054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:16.825884', 'step': 15054, 'epoch': 3} {'type': 'loss', 'content': 0.08600106090307236, 'timestamp': '2025-09-10 02:54:16.828034', 'step': 15055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:16.882278', 'step': 15055, 'epoch': 3} {'type': 'loss', 'content': 0.07924515008926392, 'timestamp': '2025-09-10 02:54:16.888089', 'step': 15056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:16.940649', 'step': 15056, 'epoch': 3} {'type': 'loss', 'content': 0.12540480494499207, 'timestamp': '2025-09-10 02:54:16.942839', 'step': 15057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:16.996417', 'step': 15057, 'epoch': 3} {'type': 'loss', 'content': 0.1153918206691742, 'timestamp': '2025-09-10 02:54:16.998561', 'step': 15058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:17.052365', 'step': 15058, 'epoch': 3} {'type': 'loss', 'content': 0.10022749751806259, 'timestamp': '2025-09-10 02:54:17.054552', 'step': 15059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:17.108511', 'step': 15059, 'epoch': 3} {'type': 'loss', 'content': 0.14038509130477905, 'timestamp': '2025-09-10 02:54:17.114314', 'step': 15060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:17.167621', 'step': 15060, 'epoch': 3} {'type': 'loss', 'content': 0.1434721052646637, 'timestamp': '2025-09-10 02:54:17.169815', 'step': 15061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:17.223333', 'step': 15061, 'epoch': 3} {'type': 'loss', 'content': 0.09516386687755585, 'timestamp': '2025-09-10 02:54:17.225586', 'step': 15062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:17.279301', 'step': 15062, 'epoch': 3} {'type': 'loss', 'content': 0.14679312705993652, 'timestamp': '2025-09-10 02:54:17.281654', 'step': 15063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:17.334905', 'step': 15063, 'epoch': 3} {'type': 'loss', 'content': 0.14209970831871033, 'timestamp': '2025-09-10 02:54:17.340782', 'step': 15064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:17.394169', 'step': 15064, 'epoch': 3} {'type': 'loss', 'content': 0.11104222387075424, 'timestamp': '2025-09-10 02:54:17.396671', 'step': 15065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:17.468854', 'step': 15065, 'epoch': 3} {'type': 'loss', 'content': 0.16701926290988922, 'timestamp': '2025-09-10 02:54:17.471090', 'step': 15066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:17.524438', 'step': 15066, 'epoch': 3} {'type': 'loss', 'content': 0.11581531167030334, 'timestamp': '2025-09-10 02:54:17.526426', 'step': 15067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:17.583301', 'step': 15067, 'epoch': 3} {'type': 'loss', 'content': 0.11679641157388687, 'timestamp': '2025-09-10 02:54:17.589179', 'step': 15068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:17.642336', 'step': 15068, 'epoch': 3} {'type': 'loss', 'content': 0.06713785231113434, 'timestamp': '2025-09-10 02:54:17.644578', 'step': 15069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:17.698309', 'step': 15069, 'epoch': 3} {'type': 'loss', 'content': 0.1075558140873909, 'timestamp': '2025-09-10 02:54:17.700456', 'step': 15070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:17.754165', 'step': 15070, 'epoch': 3} {'type': 'loss', 'content': 0.08548857271671295, 'timestamp': '2025-09-10 02:54:17.756365', 'step': 15071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:17.809764', 'step': 15071, 'epoch': 3} {'type': 'loss', 'content': 0.056593120098114014, 'timestamp': '2025-09-10 02:54:17.815414', 'step': 15072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:17.871338', 'step': 15072, 'epoch': 3} {'type': 'loss', 'content': 0.10543836653232574, 'timestamp': '2025-09-10 02:54:17.873771', 'step': 15073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:17.935942', 'step': 15073, 'epoch': 3} {'type': 'loss', 'content': 0.1839049756526947, 'timestamp': '2025-09-10 02:54:17.938015', 'step': 15074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:17.991404', 'step': 15074, 'epoch': 3} {'type': 'loss', 'content': 0.2535835802555084, 'timestamp': '2025-09-10 02:54:17.993107', 'step': 15075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:18.046790', 'step': 15075, 'epoch': 3} {'type': 'loss', 'content': 0.1489015817642212, 'timestamp': '2025-09-10 02:54:18.052488', 'step': 15076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:18.107611', 'step': 15076, 'epoch': 3} {'type': 'loss', 'content': 0.033402349799871445, 'timestamp': '2025-09-10 02:54:18.109935', 'step': 15077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:18.164514', 'step': 15077, 'epoch': 3} {'type': 'loss', 'content': 0.13598285615444183, 'timestamp': '2025-09-10 02:54:18.168576', 'step': 15078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:18.222123', 'step': 15078, 'epoch': 3} {'type': 'loss', 'content': 0.0945085883140564, 'timestamp': '2025-09-10 02:54:18.224963', 'step': 15079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:18.278730', 'step': 15079, 'epoch': 3} {'type': 'loss', 'content': 0.08736269921064377, 'timestamp': '2025-09-10 02:54:18.284808', 'step': 15080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:18.338858', 'step': 15080, 'epoch': 3} {'type': 'loss', 'content': 0.13949669897556305, 'timestamp': '2025-09-10 02:54:18.341413', 'step': 15081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:18.394721', 'step': 15081, 'epoch': 3} {'type': 'loss', 'content': 0.1520230621099472, 'timestamp': '2025-09-10 02:54:18.399871', 'step': 15082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:18.453531', 'step': 15082, 'epoch': 3} {'type': 'loss', 'content': 0.08934298157691956, 'timestamp': '2025-09-10 02:54:18.455867', 'step': 15083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:18.510549', 'step': 15083, 'epoch': 3} {'type': 'loss', 'content': 0.09934787452220917, 'timestamp': '2025-09-10 02:54:18.516315', 'step': 15084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:18.570543', 'step': 15084, 'epoch': 3} {'type': 'loss', 'content': 0.12707409262657166, 'timestamp': '2025-09-10 02:54:18.572654', 'step': 15085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:18.625958', 'step': 15085, 'epoch': 3} {'type': 'loss', 'content': 0.10191960632801056, 'timestamp': '2025-09-10 02:54:18.631312', 'step': 15086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:18.690210', 'step': 15086, 'epoch': 3} {'type': 'loss', 'content': 0.06694940477609634, 'timestamp': '2025-09-10 02:54:18.692442', 'step': 15087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:18.748034', 'step': 15087, 'epoch': 3} {'type': 'loss', 'content': 0.07438038289546967, 'timestamp': '2025-09-10 02:54:18.754769', 'step': 15088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:18.809585', 'step': 15088, 'epoch': 3} {'type': 'loss', 'content': 0.06250672042369843, 'timestamp': '2025-09-10 02:54:18.812538', 'step': 15089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:18.868225', 'step': 15089, 'epoch': 3} {'type': 'loss', 'content': 0.10792969167232513, 'timestamp': '2025-09-10 02:54:18.870807', 'step': 15090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:18.925254', 'step': 15090, 'epoch': 3} {'type': 'loss', 'content': 0.07768245041370392, 'timestamp': '2025-09-10 02:54:18.927802', 'step': 15091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:18.984059', 'step': 15091, 'epoch': 3} {'type': 'loss', 'content': 0.16547514498233795, 'timestamp': '2025-09-10 02:54:18.990203', 'step': 15092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:19.050196', 'step': 15092, 'epoch': 3} {'type': 'loss', 'content': 0.17861610651016235, 'timestamp': '2025-09-10 02:54:19.052747', 'step': 15093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:19.110016', 'step': 15093, 'epoch': 3} {'type': 'loss', 'content': 0.1454687863588333, 'timestamp': '2025-09-10 02:54:19.113579', 'step': 15094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:19.172536', 'step': 15094, 'epoch': 3} {'type': 'loss', 'content': 0.1727215051651001, 'timestamp': '2025-09-10 02:54:19.176867', 'step': 15095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:19.233632', 'step': 15095, 'epoch': 3} {'type': 'loss', 'content': 0.19529597461223602, 'timestamp': '2025-09-10 02:54:19.250007', 'step': 15096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:19.306881', 'step': 15096, 'epoch': 3} {'type': 'loss', 'content': 0.157749205827713, 'timestamp': '2025-09-10 02:54:19.311077', 'step': 15097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:19.370529', 'step': 15097, 'epoch': 3} {'type': 'loss', 'content': 0.031096920371055603, 'timestamp': '2025-09-10 02:54:19.373555', 'step': 15098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:19.432030', 'step': 15098, 'epoch': 3} {'type': 'loss', 'content': 0.17559143900871277, 'timestamp': '2025-09-10 02:54:19.434557', 'step': 15099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:19.488695', 'step': 15099, 'epoch': 3} {'type': 'loss', 'content': 0.08553950488567352, 'timestamp': '2025-09-10 02:54:19.494670', 'step': 15100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:19.548397', 'step': 15100, 'epoch': 3} {'type': 'loss', 'content': 0.13845643401145935, 'timestamp': '2025-09-10 02:54:19.550871', 'step': 15101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:19.610408', 'step': 15101, 'epoch': 3} {'type': 'loss', 'content': 0.06401839107275009, 'timestamp': '2025-09-10 02:54:19.612965', 'step': 15102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:19.666845', 'step': 15102, 'epoch': 3} {'type': 'loss', 'content': 0.16962595283985138, 'timestamp': '2025-09-10 02:54:19.676636', 'step': 15103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:19.733418', 'step': 15103, 'epoch': 3} {'type': 'loss', 'content': 0.07638683915138245, 'timestamp': '2025-09-10 02:54:19.745544', 'step': 15104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:19.803459', 'step': 15104, 'epoch': 3} {'type': 'loss', 'content': 0.1007426306605339, 'timestamp': '2025-09-10 02:54:19.805717', 'step': 15105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:19.860542', 'step': 15105, 'epoch': 3} {'type': 'loss', 'content': 0.04893675446510315, 'timestamp': '2025-09-10 02:54:19.862916', 'step': 15106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:19.918362', 'step': 15106, 'epoch': 3} {'type': 'loss', 'content': 0.12072593718767166, 'timestamp': '2025-09-10 02:54:19.920690', 'step': 15107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:19.974555', 'step': 15107, 'epoch': 3} {'type': 'loss', 'content': 0.12461504340171814, 'timestamp': '2025-09-10 02:54:19.980540', 'step': 15108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:20.034449', 'step': 15108, 'epoch': 3} {'type': 'loss', 'content': 0.14544478058815002, 'timestamp': '2025-09-10 02:54:20.036187', 'step': 15109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:20.089195', 'step': 15109, 'epoch': 3} {'type': 'loss', 'content': 0.08756354451179504, 'timestamp': '2025-09-10 02:54:20.091447', 'step': 15110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:20.145891', 'step': 15110, 'epoch': 3} {'type': 'loss', 'content': 0.043330151587724686, 'timestamp': '2025-09-10 02:54:20.148015', 'step': 15111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:20.204081', 'step': 15111, 'epoch': 3} {'type': 'loss', 'content': 0.11745169013738632, 'timestamp': '2025-09-10 02:54:20.210563', 'step': 15112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:20.265914', 'step': 15112, 'epoch': 3} {'type': 'loss', 'content': 0.13315890729427338, 'timestamp': '2025-09-10 02:54:20.268069', 'step': 15113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:20.327954', 'step': 15113, 'epoch': 3} {'type': 'loss', 'content': 0.12950557470321655, 'timestamp': '2025-09-10 02:54:20.330110', 'step': 15114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:20.385280', 'step': 15114, 'epoch': 3} {'type': 'loss', 'content': 0.11972791701555252, 'timestamp': '2025-09-10 02:54:20.387434', 'step': 15115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:20.441462', 'step': 15115, 'epoch': 3} {'type': 'loss', 'content': 0.09573344886302948, 'timestamp': '2025-09-10 02:54:20.447546', 'step': 15116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:20.501104', 'step': 15116, 'epoch': 3} {'type': 'loss', 'content': 0.13728271424770355, 'timestamp': '2025-09-10 02:54:20.503187', 'step': 15117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:20.557052', 'step': 15117, 'epoch': 3} {'type': 'loss', 'content': 0.11689970642328262, 'timestamp': '2025-09-10 02:54:20.559195', 'step': 15118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:20.613317', 'step': 15118, 'epoch': 3} {'type': 'loss', 'content': 0.061808302998542786, 'timestamp': '2025-09-10 02:54:20.615528', 'step': 15119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:20.669142', 'step': 15119, 'epoch': 3} {'type': 'loss', 'content': 0.115445077419281, 'timestamp': '2025-09-10 02:54:20.675355', 'step': 15120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:20.730210', 'step': 15120, 'epoch': 3} {'type': 'loss', 'content': 0.18951573967933655, 'timestamp': '2025-09-10 02:54:20.732518', 'step': 15121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:20.787055', 'step': 15121, 'epoch': 3} {'type': 'loss', 'content': 0.059081077575683594, 'timestamp': '2025-09-10 02:54:20.789455', 'step': 15122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:20.844819', 'step': 15122, 'epoch': 3} {'type': 'loss', 'content': 0.1057664081454277, 'timestamp': '2025-09-10 02:54:20.848818', 'step': 15123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:20.908717', 'step': 15123, 'epoch': 3} {'type': 'loss', 'content': 0.11263395100831985, 'timestamp': '2025-09-10 02:54:20.914830', 'step': 15124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:20.968411', 'step': 15124, 'epoch': 3} {'type': 'loss', 'content': 0.14965102076530457, 'timestamp': '2025-09-10 02:54:20.970600', 'step': 15125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:21.024070', 'step': 15125, 'epoch': 3} {'type': 'loss', 'content': 0.08843620866537094, 'timestamp': '2025-09-10 02:54:21.029492', 'step': 15126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:21.089460', 'step': 15126, 'epoch': 3} {'type': 'loss', 'content': 0.13567788898944855, 'timestamp': '2025-09-10 02:54:21.091681', 'step': 15127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:21.145508', 'step': 15127, 'epoch': 3} {'type': 'loss', 'content': 0.14417877793312073, 'timestamp': '2025-09-10 02:54:21.153556', 'step': 15128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:21.209707', 'step': 15128, 'epoch': 3} {'type': 'loss', 'content': 0.10973574966192245, 'timestamp': '2025-09-10 02:54:21.211621', 'step': 15129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:21.264841', 'step': 15129, 'epoch': 3} {'type': 'loss', 'content': 0.2576344311237335, 'timestamp': '2025-09-10 02:54:21.267579', 'step': 15130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:21.324386', 'step': 15130, 'epoch': 3} {'type': 'loss', 'content': 0.08958924561738968, 'timestamp': '2025-09-10 02:54:21.326740', 'step': 15131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:21.382248', 'step': 15131, 'epoch': 3} {'type': 'loss', 'content': 0.16072137653827667, 'timestamp': '2025-09-10 02:54:21.388642', 'step': 15132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:21.443689', 'step': 15132, 'epoch': 3} {'type': 'loss', 'content': 0.13162222504615784, 'timestamp': '2025-09-10 02:54:21.445852', 'step': 15133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:21.501357', 'step': 15133, 'epoch': 3} {'type': 'loss', 'content': 0.18612854182720184, 'timestamp': '2025-09-10 02:54:21.503493', 'step': 15134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:21.558543', 'step': 15134, 'epoch': 3} {'type': 'loss', 'content': 0.09041079133749008, 'timestamp': '2025-09-10 02:54:21.560958', 'step': 15135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:21.614942', 'step': 15135, 'epoch': 3} {'type': 'loss', 'content': 0.14440470933914185, 'timestamp': '2025-09-10 02:54:21.621252', 'step': 15136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:21.674756', 'step': 15136, 'epoch': 3} {'type': 'loss', 'content': 0.10666992515325546, 'timestamp': '2025-09-10 02:54:21.676994', 'step': 15137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:21.730145', 'step': 15137, 'epoch': 3} {'type': 'loss', 'content': 0.12738966941833496, 'timestamp': '2025-09-10 02:54:21.732447', 'step': 15138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:21.792560', 'step': 15138, 'epoch': 3} {'type': 'loss', 'content': 0.14666131138801575, 'timestamp': '2025-09-10 02:54:21.794770', 'step': 15139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:21.849603', 'step': 15139, 'epoch': 3} {'type': 'loss', 'content': 0.12689785659313202, 'timestamp': '2025-09-10 02:54:21.855901', 'step': 15140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:21.909841', 'step': 15140, 'epoch': 3} {'type': 'loss', 'content': 0.1444319635629654, 'timestamp': '2025-09-10 02:54:21.912010', 'step': 15141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:21.966766', 'step': 15141, 'epoch': 3} {'type': 'loss', 'content': 0.08733637630939484, 'timestamp': '2025-09-10 02:54:21.968940', 'step': 15142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:22.022984', 'step': 15142, 'epoch': 3} {'type': 'loss', 'content': 0.1639781892299652, 'timestamp': '2025-09-10 02:54:22.026567', 'step': 15143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:22.084113', 'step': 15143, 'epoch': 3} {'type': 'loss', 'content': 0.04757057502865791, 'timestamp': '2025-09-10 02:54:22.090329', 'step': 15144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:22.144685', 'step': 15144, 'epoch': 3} {'type': 'loss', 'content': 0.06696435809135437, 'timestamp': '2025-09-10 02:54:22.146791', 'step': 15145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:22.201321', 'step': 15145, 'epoch': 3} {'type': 'loss', 'content': 0.11678295582532883, 'timestamp': '2025-09-10 02:54:22.203541', 'step': 15146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:22.257962', 'step': 15146, 'epoch': 3} {'type': 'loss', 'content': 0.16163471341133118, 'timestamp': '2025-09-10 02:54:22.260143', 'step': 15147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:22.314901', 'step': 15147, 'epoch': 3} {'type': 'loss', 'content': 0.09995856136083603, 'timestamp': '2025-09-10 02:54:22.320999', 'step': 15148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:22.375908', 'step': 15148, 'epoch': 3} {'type': 'loss', 'content': 0.19036328792572021, 'timestamp': '2025-09-10 02:54:22.378402', 'step': 15149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:22.433570', 'step': 15149, 'epoch': 3} {'type': 'loss', 'content': 0.043314628303050995, 'timestamp': '2025-09-10 02:54:22.435892', 'step': 15150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:22.490527', 'step': 15150, 'epoch': 3} {'type': 'loss', 'content': 0.06803114712238312, 'timestamp': '2025-09-10 02:54:22.493017', 'step': 15151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:22.547974', 'step': 15151, 'epoch': 3} {'type': 'loss', 'content': 0.1332303285598755, 'timestamp': '2025-09-10 02:54:22.554420', 'step': 15152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:22.608372', 'step': 15152, 'epoch': 3} {'type': 'loss', 'content': 0.10556258261203766, 'timestamp': '2025-09-10 02:54:22.610655', 'step': 15153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:22.664973', 'step': 15153, 'epoch': 3} {'type': 'loss', 'content': 0.10162699967622757, 'timestamp': '2025-09-10 02:54:22.667170', 'step': 15154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:22.721425', 'step': 15154, 'epoch': 3} {'type': 'loss', 'content': 0.16331352293491364, 'timestamp': '2025-09-10 02:54:22.723624', 'step': 15155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:22.777698', 'step': 15155, 'epoch': 3} {'type': 'loss', 'content': 0.10762470215559006, 'timestamp': '2025-09-10 02:54:22.783851', 'step': 15156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:22.837075', 'step': 15156, 'epoch': 3} {'type': 'loss', 'content': 0.13426831364631653, 'timestamp': '2025-09-10 02:54:22.839325', 'step': 15157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:22.893826', 'step': 15157, 'epoch': 3} {'type': 'loss', 'content': 0.14048370718955994, 'timestamp': '2025-09-10 02:54:22.896102', 'step': 15158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:22.950472', 'step': 15158, 'epoch': 3} {'type': 'loss', 'content': 0.1371818333864212, 'timestamp': '2025-09-10 02:54:22.952645', 'step': 15159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:23.011378', 'step': 15159, 'epoch': 3} {'type': 'loss', 'content': 0.04060467705130577, 'timestamp': '2025-09-10 02:54:23.017385', 'step': 15160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:23.070632', 'step': 15160, 'epoch': 3} {'type': 'loss', 'content': 0.1553112268447876, 'timestamp': '2025-09-10 02:54:23.072815', 'step': 15161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:23.128227', 'step': 15161, 'epoch': 3} {'type': 'loss', 'content': 0.04407636821269989, 'timestamp': '2025-09-10 02:54:23.130458', 'step': 15162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:23.185251', 'step': 15162, 'epoch': 3} {'type': 'loss', 'content': 0.24781487882137299, 'timestamp': '2025-09-10 02:54:23.187373', 'step': 15163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:23.241140', 'step': 15163, 'epoch': 3} {'type': 'loss', 'content': 0.14921368658542633, 'timestamp': '2025-09-10 02:54:23.246874', 'step': 15164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:23.300165', 'step': 15164, 'epoch': 3} {'type': 'loss', 'content': 0.18980316817760468, 'timestamp': '2025-09-10 02:54:23.302472', 'step': 15165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:23.356699', 'step': 15165, 'epoch': 3} {'type': 'loss', 'content': 0.18627645075321198, 'timestamp': '2025-09-10 02:54:23.358964', 'step': 15166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:23.419534', 'step': 15166, 'epoch': 3} {'type': 'loss', 'content': 0.04333266615867615, 'timestamp': '2025-09-10 02:54:23.421891', 'step': 15167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:23.477114', 'step': 15167, 'epoch': 3} {'type': 'loss', 'content': 0.07901587337255478, 'timestamp': '2025-09-10 02:54:23.483466', 'step': 15168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:23.536809', 'step': 15168, 'epoch': 3} {'type': 'loss', 'content': 0.10588739067316055, 'timestamp': '2025-09-10 02:54:23.538759', 'step': 15169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:23.592143', 'step': 15169, 'epoch': 3} {'type': 'loss', 'content': 0.09983669221401215, 'timestamp': '2025-09-10 02:54:23.594525', 'step': 15170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:23.648316', 'step': 15170, 'epoch': 3} {'type': 'loss', 'content': 0.08505764603614807, 'timestamp': '2025-09-10 02:54:23.650485', 'step': 15171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:23.708762', 'step': 15171, 'epoch': 3} {'type': 'loss', 'content': 0.11810587346553802, 'timestamp': '2025-09-10 02:54:23.714574', 'step': 15172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:23.773695', 'step': 15172, 'epoch': 3} {'type': 'loss', 'content': 0.1697625368833542, 'timestamp': '2025-09-10 02:54:23.776003', 'step': 15173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:23.835917', 'step': 15173, 'epoch': 3} {'type': 'loss', 'content': 0.0530097670853138, 'timestamp': '2025-09-10 02:54:23.838079', 'step': 15174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:23.891713', 'step': 15174, 'epoch': 3} {'type': 'loss', 'content': 0.08807143568992615, 'timestamp': '2025-09-10 02:54:23.893937', 'step': 15175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:23.947273', 'step': 15175, 'epoch': 3} {'type': 'loss', 'content': 0.043319568037986755, 'timestamp': '2025-09-10 02:54:23.953193', 'step': 15176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:24.006970', 'step': 15176, 'epoch': 3} {'type': 'loss', 'content': 0.1306157410144806, 'timestamp': '2025-09-10 02:54:24.009068', 'step': 15177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:24.063501', 'step': 15177, 'epoch': 3} {'type': 'loss', 'content': 0.09522800147533417, 'timestamp': '2025-09-10 02:54:24.065817', 'step': 15178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:24.120113', 'step': 15178, 'epoch': 3} {'type': 'loss', 'content': 0.09501346200704575, 'timestamp': '2025-09-10 02:54:24.122434', 'step': 15179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:24.177621', 'step': 15179, 'epoch': 3} {'type': 'loss', 'content': 0.19198215007781982, 'timestamp': '2025-09-10 02:54:24.183737', 'step': 15180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:24.237278', 'step': 15180, 'epoch': 3} {'type': 'loss', 'content': 0.12083762139081955, 'timestamp': '2025-09-10 02:54:24.239401', 'step': 15181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:24.295481', 'step': 15181, 'epoch': 3} {'type': 'loss', 'content': 0.06423326581716537, 'timestamp': '2025-09-10 02:54:24.297473', 'step': 15182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:24.352239', 'step': 15182, 'epoch': 3} {'type': 'loss', 'content': 0.09891505539417267, 'timestamp': '2025-09-10 02:54:24.354496', 'step': 15183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:24.410584', 'step': 15183, 'epoch': 3} {'type': 'loss', 'content': 0.08675345778465271, 'timestamp': '2025-09-10 02:54:24.416715', 'step': 15184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:24.470897', 'step': 15184, 'epoch': 3} {'type': 'loss', 'content': 0.10966213047504425, 'timestamp': '2025-09-10 02:54:24.473396', 'step': 15185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:24.528810', 'step': 15185, 'epoch': 3} {'type': 'loss', 'content': 0.08400639146566391, 'timestamp': '2025-09-10 02:54:24.531090', 'step': 15186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:24.586689', 'step': 15186, 'epoch': 3} {'type': 'loss', 'content': 0.06128187105059624, 'timestamp': '2025-09-10 02:54:24.588733', 'step': 15187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:24.642546', 'step': 15187, 'epoch': 3} {'type': 'loss', 'content': 0.0931919738650322, 'timestamp': '2025-09-10 02:54:24.648686', 'step': 15188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:24.702499', 'step': 15188, 'epoch': 3} {'type': 'loss', 'content': 0.09092503041028976, 'timestamp': '2025-09-10 02:54:24.704502', 'step': 15189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:24.758662', 'step': 15189, 'epoch': 3} {'type': 'loss', 'content': 0.054562076926231384, 'timestamp': '2025-09-10 02:54:24.760914', 'step': 15190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:24.815167', 'step': 15190, 'epoch': 3} {'type': 'loss', 'content': 0.17385464906692505, 'timestamp': '2025-09-10 02:54:24.817195', 'step': 15191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:24.871071', 'step': 15191, 'epoch': 3} {'type': 'loss', 'content': 0.13789808750152588, 'timestamp': '2025-09-10 02:54:24.877277', 'step': 15192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:24.930714', 'step': 15192, 'epoch': 3} {'type': 'loss', 'content': 0.04549407958984375, 'timestamp': '2025-09-10 02:54:24.932954', 'step': 15193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:24.991762', 'step': 15193, 'epoch': 3} {'type': 'loss', 'content': 0.07620254904031754, 'timestamp': '2025-09-10 02:54:24.994110', 'step': 15194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:25.050276', 'step': 15194, 'epoch': 3} {'type': 'loss', 'content': 0.09602741152048111, 'timestamp': '2025-09-10 02:54:25.052462', 'step': 15195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:25.108542', 'step': 15195, 'epoch': 3} {'type': 'loss', 'content': 0.07117927819490433, 'timestamp': '2025-09-10 02:54:25.114889', 'step': 15196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:25.170145', 'step': 15196, 'epoch': 3} {'type': 'loss', 'content': 0.11676332354545593, 'timestamp': '2025-09-10 02:54:25.172403', 'step': 15197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:25.226981', 'step': 15197, 'epoch': 3} {'type': 'loss', 'content': 0.1172533929347992, 'timestamp': '2025-09-10 02:54:25.229229', 'step': 15198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:25.286134', 'step': 15198, 'epoch': 3} {'type': 'loss', 'content': 0.09174950420856476, 'timestamp': '2025-09-10 02:54:25.288368', 'step': 15199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:25.342995', 'step': 15199, 'epoch': 3} {'type': 'loss', 'content': 0.14813290536403656, 'timestamp': '2025-09-10 02:54:25.349179', 'step': 15200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:25.408136', 'step': 15200, 'epoch': 3} {'type': 'loss', 'content': 0.022449664771556854, 'timestamp': '2025-09-10 02:54:25.410400', 'step': 15201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:25.471333', 'step': 15201, 'epoch': 3} {'type': 'loss', 'content': 0.09661313146352768, 'timestamp': '2025-09-10 02:54:25.473535', 'step': 15202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:25.533873', 'step': 15202, 'epoch': 3} {'type': 'loss', 'content': 0.0763992965221405, 'timestamp': '2025-09-10 02:54:25.536445', 'step': 15203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:25.599355', 'step': 15203, 'epoch': 3} {'type': 'loss', 'content': 0.04700002819299698, 'timestamp': '2025-09-10 02:54:25.605710', 'step': 15204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:25.669620', 'step': 15204, 'epoch': 3} {'type': 'loss', 'content': 0.10542106628417969, 'timestamp': '2025-09-10 02:54:25.671750', 'step': 15205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:25.744632', 'step': 15205, 'epoch': 3} {'type': 'loss', 'content': 0.21200361847877502, 'timestamp': '2025-09-10 02:54:25.746815', 'step': 15206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:25.800904', 'step': 15206, 'epoch': 3} {'type': 'loss', 'content': 0.21322959661483765, 'timestamp': '2025-09-10 02:54:25.803023', 'step': 15207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:25.856400', 'step': 15207, 'epoch': 3} {'type': 'loss', 'content': 0.22517183423042297, 'timestamp': '2025-09-10 02:54:25.862641', 'step': 15208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:25.915671', 'step': 15208, 'epoch': 3} {'type': 'loss', 'content': 0.18535581231117249, 'timestamp': '2025-09-10 02:54:25.917608', 'step': 15209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:25.970975', 'step': 15209, 'epoch': 3} {'type': 'loss', 'content': 0.11240334808826447, 'timestamp': '2025-09-10 02:54:25.972829', 'step': 15210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:26.026455', 'step': 15210, 'epoch': 3} {'type': 'loss', 'content': 0.0809623971581459, 'timestamp': '2025-09-10 02:54:26.028416', 'step': 15211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:26.081696', 'step': 15211, 'epoch': 3} {'type': 'loss', 'content': 0.19639833271503448, 'timestamp': '2025-09-10 02:54:26.087735', 'step': 15212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:26.141000', 'step': 15212, 'epoch': 3} {'type': 'loss', 'content': 0.17990951240062714, 'timestamp': '2025-09-10 02:54:26.143156', 'step': 15213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:26.198261', 'step': 15213, 'epoch': 3} {'type': 'loss', 'content': 0.13554313778877258, 'timestamp': '2025-09-10 02:54:26.200400', 'step': 15214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:26.254648', 'step': 15214, 'epoch': 3} {'type': 'loss', 'content': 0.12771308422088623, 'timestamp': '2025-09-10 02:54:26.256804', 'step': 15215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:26.311240', 'step': 15215, 'epoch': 3} {'type': 'loss', 'content': 0.05898023024201393, 'timestamp': '2025-09-10 02:54:26.317187', 'step': 15216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:26.370769', 'step': 15216, 'epoch': 3} {'type': 'loss', 'content': 0.11561095714569092, 'timestamp': '2025-09-10 02:54:26.372885', 'step': 15217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:26.426671', 'step': 15217, 'epoch': 3} {'type': 'loss', 'content': 0.10357553511857986, 'timestamp': '2025-09-10 02:54:26.428844', 'step': 15218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:26.483595', 'step': 15218, 'epoch': 3} {'type': 'loss', 'content': 0.11081061512231827, 'timestamp': '2025-09-10 02:54:26.485625', 'step': 15219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:26.542627', 'step': 15219, 'epoch': 3} {'type': 'loss', 'content': 0.1280529946088791, 'timestamp': '2025-09-10 02:54:26.548600', 'step': 15220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:26.603841', 'step': 15220, 'epoch': 3} {'type': 'loss', 'content': 0.121108278632164, 'timestamp': '2025-09-10 02:54:26.605935', 'step': 15221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:26.659977', 'step': 15221, 'epoch': 3} {'type': 'loss', 'content': 0.1370084285736084, 'timestamp': '2025-09-10 02:54:26.662315', 'step': 15222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:26.716597', 'step': 15222, 'epoch': 3} {'type': 'loss', 'content': 0.08431452512741089, 'timestamp': '2025-09-10 02:54:26.718627', 'step': 15223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:26.772460', 'step': 15223, 'epoch': 3} {'type': 'loss', 'content': 0.15149182081222534, 'timestamp': '2025-09-10 02:54:26.778370', 'step': 15224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:54:26.847661', 'step': 15224, 'epoch': 3} {'type': 'loss', 'content': 0.07573597878217697, 'timestamp': '2025-09-10 02:54:26.861275', 'step': 15225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:26.917291', 'step': 15225, 'epoch': 3} {'type': 'loss', 'content': 0.1711936593055725, 'timestamp': '2025-09-10 02:54:26.919385', 'step': 15226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:26.972846', 'step': 15226, 'epoch': 3} {'type': 'loss', 'content': 0.12214279174804688, 'timestamp': '2025-09-10 02:54:26.975069', 'step': 15227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:27.028434', 'step': 15227, 'epoch': 3} {'type': 'loss', 'content': 0.07722042500972748, 'timestamp': '2025-09-10 02:54:27.034577', 'step': 15228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:27.087765', 'step': 15228, 'epoch': 3} {'type': 'loss', 'content': 0.1252635270357132, 'timestamp': '2025-09-10 02:54:27.089748', 'step': 15229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:27.143750', 'step': 15229, 'epoch': 3} {'type': 'loss', 'content': 0.1261649876832962, 'timestamp': '2025-09-10 02:54:27.145706', 'step': 15230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:27.200642', 'step': 15230, 'epoch': 3} {'type': 'loss', 'content': 0.12713629007339478, 'timestamp': '2025-09-10 02:54:27.202771', 'step': 15231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:27.257029', 'step': 15231, 'epoch': 3} {'type': 'loss', 'content': 0.06913691014051437, 'timestamp': '2025-09-10 02:54:27.262777', 'step': 15232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:27.316419', 'step': 15232, 'epoch': 3} {'type': 'loss', 'content': 0.06307058036327362, 'timestamp': '2025-09-10 02:54:27.318322', 'step': 15233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:27.371727', 'step': 15233, 'epoch': 3} {'type': 'loss', 'content': 0.1302233338356018, 'timestamp': '2025-09-10 02:54:27.373782', 'step': 15234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:27.427543', 'step': 15234, 'epoch': 3} {'type': 'loss', 'content': 0.059747133404016495, 'timestamp': '2025-09-10 02:54:27.429723', 'step': 15235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:27.483397', 'step': 15235, 'epoch': 3} {'type': 'loss', 'content': 0.06560041010379791, 'timestamp': '2025-09-10 02:54:27.489482', 'step': 15236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:27.542997', 'step': 15236, 'epoch': 3} {'type': 'loss', 'content': 0.08494576066732407, 'timestamp': '2025-09-10 02:54:27.545304', 'step': 15237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:27.599068', 'step': 15237, 'epoch': 3} {'type': 'loss', 'content': 0.08876737207174301, 'timestamp': '2025-09-10 02:54:27.601081', 'step': 15238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:27.654590', 'step': 15238, 'epoch': 3} {'type': 'loss', 'content': 0.10641071945428848, 'timestamp': '2025-09-10 02:54:27.656632', 'step': 15239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:27.710682', 'step': 15239, 'epoch': 3} {'type': 'loss', 'content': 0.14246292412281036, 'timestamp': '2025-09-10 02:54:27.716900', 'step': 15240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:27.770654', 'step': 15240, 'epoch': 3} {'type': 'loss', 'content': 0.2262236773967743, 'timestamp': '2025-09-10 02:54:27.772779', 'step': 15241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:27.826239', 'step': 15241, 'epoch': 3} {'type': 'loss', 'content': 0.11664421111345291, 'timestamp': '2025-09-10 02:54:27.828485', 'step': 15242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:27.883309', 'step': 15242, 'epoch': 3} {'type': 'loss', 'content': 0.053191449493169785, 'timestamp': '2025-09-10 02:54:27.885545', 'step': 15243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:27.939544', 'step': 15243, 'epoch': 3} {'type': 'loss', 'content': 0.0636945366859436, 'timestamp': '2025-09-10 02:54:27.945556', 'step': 15244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:27.999475', 'step': 15244, 'epoch': 3} {'type': 'loss', 'content': 0.08571677654981613, 'timestamp': '2025-09-10 02:54:28.001709', 'step': 15245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:28.073757', 'step': 15245, 'epoch': 3} {'type': 'loss', 'content': 0.217405766248703, 'timestamp': '2025-09-10 02:54:28.076006', 'step': 15246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:28.131534', 'step': 15246, 'epoch': 3} {'type': 'loss', 'content': 0.06856638193130493, 'timestamp': '2025-09-10 02:54:28.133738', 'step': 15247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:28.187959', 'step': 15247, 'epoch': 3} {'type': 'loss', 'content': 0.12729105353355408, 'timestamp': '2025-09-10 02:54:28.194018', 'step': 15248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:28.247157', 'step': 15248, 'epoch': 3} {'type': 'loss', 'content': 0.09112492948770523, 'timestamp': '2025-09-10 02:54:28.253895', 'step': 15249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:28.307790', 'step': 15249, 'epoch': 3} {'type': 'loss', 'content': 0.06065414473414421, 'timestamp': '2025-09-10 02:54:28.310174', 'step': 15250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:28.364768', 'step': 15250, 'epoch': 3} {'type': 'loss', 'content': 0.1794264167547226, 'timestamp': '2025-09-10 02:54:28.366967', 'step': 15251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:28.422415', 'step': 15251, 'epoch': 3} {'type': 'loss', 'content': 0.12463727593421936, 'timestamp': '2025-09-10 02:54:28.430376', 'step': 15252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:28.484695', 'step': 15252, 'epoch': 3} {'type': 'loss', 'content': 0.055561818182468414, 'timestamp': '2025-09-10 02:54:28.486875', 'step': 15253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:28.541486', 'step': 15253, 'epoch': 3} {'type': 'loss', 'content': 0.15537168085575104, 'timestamp': '2025-09-10 02:54:28.543702', 'step': 15254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:28.608132', 'step': 15254, 'epoch': 3} {'type': 'loss', 'content': 0.060204554349184036, 'timestamp': '2025-09-10 02:54:28.611299', 'step': 15255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:28.666016', 'step': 15255, 'epoch': 3} {'type': 'loss', 'content': 0.09336967766284943, 'timestamp': '2025-09-10 02:54:28.675287', 'step': 15256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:28.729623', 'step': 15256, 'epoch': 3} {'type': 'loss', 'content': 0.14312925934791565, 'timestamp': '2025-09-10 02:54:28.732759', 'step': 15257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:28.793823', 'step': 15257, 'epoch': 3} {'type': 'loss', 'content': 0.0691143274307251, 'timestamp': '2025-09-10 02:54:28.796028', 'step': 15258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:28.851720', 'step': 15258, 'epoch': 3} {'type': 'loss', 'content': 0.089325912296772, 'timestamp': '2025-09-10 02:54:28.853832', 'step': 15259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:28.909127', 'step': 15259, 'epoch': 3} {'type': 'loss', 'content': 0.11185574531555176, 'timestamp': '2025-09-10 02:54:28.915509', 'step': 15260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:28.971536', 'step': 15260, 'epoch': 3} {'type': 'loss', 'content': 0.09868782013654709, 'timestamp': '2025-09-10 02:54:28.973606', 'step': 15261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:29.027793', 'step': 15261, 'epoch': 3} {'type': 'loss', 'content': 0.12010648101568222, 'timestamp': '2025-09-10 02:54:29.030666', 'step': 15262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:29.089638', 'step': 15262, 'epoch': 3} {'type': 'loss', 'content': 0.10802248865365982, 'timestamp': '2025-09-10 02:54:29.094824', 'step': 15263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:29.149764', 'step': 15263, 'epoch': 3} {'type': 'loss', 'content': 0.07109829783439636, 'timestamp': '2025-09-10 02:54:29.156063', 'step': 15264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:29.209562', 'step': 15264, 'epoch': 3} {'type': 'loss', 'content': 0.13468649983406067, 'timestamp': '2025-09-10 02:54:29.213149', 'step': 15265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:29.270024', 'step': 15265, 'epoch': 3} {'type': 'loss', 'content': 0.13798223435878754, 'timestamp': '2025-09-10 02:54:29.272118', 'step': 15266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:29.326009', 'step': 15266, 'epoch': 3} {'type': 'loss', 'content': 0.1898735761642456, 'timestamp': '2025-09-10 02:54:29.328286', 'step': 15267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:29.382299', 'step': 15267, 'epoch': 3} {'type': 'loss', 'content': 0.07158654183149338, 'timestamp': '2025-09-10 02:54:29.388621', 'step': 15268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:29.442502', 'step': 15268, 'epoch': 3} {'type': 'loss', 'content': 0.07864972203969955, 'timestamp': '2025-09-10 02:54:29.444686', 'step': 15269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:29.503069', 'step': 15269, 'epoch': 3} {'type': 'loss', 'content': 0.08709164708852768, 'timestamp': '2025-09-10 02:54:29.505262', 'step': 15270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:29.560383', 'step': 15270, 'epoch': 3} {'type': 'loss', 'content': 0.10276127606630325, 'timestamp': '2025-09-10 02:54:29.562564', 'step': 15271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:29.617504', 'step': 15271, 'epoch': 3} {'type': 'loss', 'content': 0.1362224966287613, 'timestamp': '2025-09-10 02:54:29.623504', 'step': 15272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:29.678222', 'step': 15272, 'epoch': 3} {'type': 'loss', 'content': 0.07522295415401459, 'timestamp': '2025-09-10 02:54:29.680438', 'step': 15273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:29.734681', 'step': 15273, 'epoch': 3} {'type': 'loss', 'content': 0.07051835209131241, 'timestamp': '2025-09-10 02:54:29.736845', 'step': 15274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:29.790912', 'step': 15274, 'epoch': 3} {'type': 'loss', 'content': 0.028225069865584373, 'timestamp': '2025-09-10 02:54:29.793116', 'step': 15275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:29.846753', 'step': 15275, 'epoch': 3} {'type': 'loss', 'content': 0.2247823029756546, 'timestamp': '2025-09-10 02:54:29.852731', 'step': 15276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:29.907010', 'step': 15276, 'epoch': 3} {'type': 'loss', 'content': 0.13075540959835052, 'timestamp': '2025-09-10 02:54:29.909181', 'step': 15277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:29.966089', 'step': 15277, 'epoch': 3} {'type': 'loss', 'content': 0.06265772134065628, 'timestamp': '2025-09-10 02:54:29.968235', 'step': 15278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:30.022991', 'step': 15278, 'epoch': 3} {'type': 'loss', 'content': 0.1037389263510704, 'timestamp': '2025-09-10 02:54:30.025336', 'step': 15279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:30.082758', 'step': 15279, 'epoch': 3} {'type': 'loss', 'content': 0.15852491557598114, 'timestamp': '2025-09-10 02:54:30.089007', 'step': 15280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:30.145164', 'step': 15280, 'epoch': 3} {'type': 'loss', 'content': 0.06674659997224808, 'timestamp': '2025-09-10 02:54:30.147558', 'step': 15281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:30.242146', 'step': 15281, 'epoch': 3} {'type': 'loss', 'content': 0.09286913275718689, 'timestamp': '2025-09-10 02:54:30.244400', 'step': 15282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:30.333683', 'step': 15282, 'epoch': 3} {'type': 'loss', 'content': 0.04801134020090103, 'timestamp': '2025-09-10 02:54:30.335852', 'step': 15283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:30.395342', 'step': 15283, 'epoch': 3} {'type': 'loss', 'content': 0.06091592460870743, 'timestamp': '2025-09-10 02:54:30.401763', 'step': 15284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:30.465776', 'step': 15284, 'epoch': 3} {'type': 'loss', 'content': 0.15881286561489105, 'timestamp': '2025-09-10 02:54:30.467874', 'step': 15285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:30.538714', 'step': 15285, 'epoch': 3} {'type': 'loss', 'content': 0.062385693192481995, 'timestamp': '2025-09-10 02:54:30.540954', 'step': 15286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:30.617884', 'step': 15286, 'epoch': 3} {'type': 'loss', 'content': 0.2526751458644867, 'timestamp': '2025-09-10 02:54:30.620190', 'step': 15287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:30.695618', 'step': 15287, 'epoch': 3} {'type': 'loss', 'content': 0.08931326121091843, 'timestamp': '2025-09-10 02:54:30.701954', 'step': 15288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:30.768715', 'step': 15288, 'epoch': 3} {'type': 'loss', 'content': 0.12263141572475433, 'timestamp': '2025-09-10 02:54:30.770949', 'step': 15289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:30.830678', 'step': 15289, 'epoch': 3} {'type': 'loss', 'content': 0.06537970155477524, 'timestamp': '2025-09-10 02:54:30.833005', 'step': 15290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:30.923468', 'step': 15290, 'epoch': 3} {'type': 'loss', 'content': 0.1010601744055748, 'timestamp': '2025-09-10 02:54:30.925426', 'step': 15291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:31.014766', 'step': 15291, 'epoch': 3} {'type': 'loss', 'content': 0.14200496673583984, 'timestamp': '2025-09-10 02:54:31.021027', 'step': 15292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:31.088155', 'step': 15292, 'epoch': 3} {'type': 'loss', 'content': 0.08383367955684662, 'timestamp': '2025-09-10 02:54:31.093051', 'step': 15293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:31.179810', 'step': 15293, 'epoch': 3} {'type': 'loss', 'content': 0.10398344695568085, 'timestamp': '2025-09-10 02:54:31.182286', 'step': 15294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:31.271860', 'step': 15294, 'epoch': 3} {'type': 'loss', 'content': 0.09473073482513428, 'timestamp': '2025-09-10 02:54:31.274113', 'step': 15295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:31.362105', 'step': 15295, 'epoch': 3} {'type': 'loss', 'content': 0.0655822679400444, 'timestamp': '2025-09-10 02:54:31.368444', 'step': 15296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:31.433492', 'step': 15296, 'epoch': 3} {'type': 'loss', 'content': 0.12538304924964905, 'timestamp': '2025-09-10 02:54:31.435702', 'step': 15297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:31.493674', 'step': 15297, 'epoch': 3} {'type': 'loss', 'content': 0.0656861811876297, 'timestamp': '2025-09-10 02:54:31.495881', 'step': 15298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:31.549945', 'step': 15298, 'epoch': 3} {'type': 'loss', 'content': 0.12301022559404373, 'timestamp': '2025-09-10 02:54:31.552123', 'step': 15299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:31.615332', 'step': 15299, 'epoch': 3} {'type': 'loss', 'content': 0.22529782354831696, 'timestamp': '2025-09-10 02:54:31.621250', 'step': 15300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:31.674621', 'step': 15300, 'epoch': 3} {'type': 'loss', 'content': 0.14790120720863342, 'timestamp': '2025-09-10 02:54:31.676602', 'step': 15301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:31.730507', 'step': 15301, 'epoch': 3} {'type': 'loss', 'content': 0.2252863198518753, 'timestamp': '2025-09-10 02:54:31.732654', 'step': 15302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:31.786856', 'step': 15302, 'epoch': 3} {'type': 'loss', 'content': 0.06501297652721405, 'timestamp': '2025-09-10 02:54:31.789145', 'step': 15303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:31.847674', 'step': 15303, 'epoch': 3} {'type': 'loss', 'content': 0.09982641786336899, 'timestamp': '2025-09-10 02:54:31.853769', 'step': 15304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:31.911846', 'step': 15304, 'epoch': 3} {'type': 'loss', 'content': 0.08085323125123978, 'timestamp': '2025-09-10 02:54:31.914031', 'step': 15305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:31.968656', 'step': 15305, 'epoch': 3} {'type': 'loss', 'content': 0.04217533767223358, 'timestamp': '2025-09-10 02:54:31.970913', 'step': 15306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:32.026080', 'step': 15306, 'epoch': 3} {'type': 'loss', 'content': 0.03223733231425285, 'timestamp': '2025-09-10 02:54:32.028574', 'step': 15307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:32.088862', 'step': 15307, 'epoch': 3} {'type': 'loss', 'content': 0.17252899706363678, 'timestamp': '2025-09-10 02:54:32.095270', 'step': 15308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:32.156398', 'step': 15308, 'epoch': 3} {'type': 'loss', 'content': 0.1415262222290039, 'timestamp': '2025-09-10 02:54:32.158571', 'step': 15309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:32.215902', 'step': 15309, 'epoch': 3} {'type': 'loss', 'content': 0.09480675309896469, 'timestamp': '2025-09-10 02:54:32.218186', 'step': 15310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:32.272315', 'step': 15310, 'epoch': 3} {'type': 'loss', 'content': 0.11068835854530334, 'timestamp': '2025-09-10 02:54:32.275012', 'step': 15311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:32.329970', 'step': 15311, 'epoch': 3} {'type': 'loss', 'content': 0.11639906466007233, 'timestamp': '2025-09-10 02:54:32.335992', 'step': 15312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:32.390004', 'step': 15312, 'epoch': 3} {'type': 'loss', 'content': 0.1027451753616333, 'timestamp': '2025-09-10 02:54:32.392229', 'step': 15313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:32.446864', 'step': 15313, 'epoch': 3} {'type': 'loss', 'content': 0.07109849154949188, 'timestamp': '2025-09-10 02:54:32.449100', 'step': 15314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:32.505451', 'step': 15314, 'epoch': 3} {'type': 'loss', 'content': 0.19880323112010956, 'timestamp': '2025-09-10 02:54:32.508073', 'step': 15315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:32.566804', 'step': 15315, 'epoch': 3} {'type': 'loss', 'content': 0.06827782094478607, 'timestamp': '2025-09-10 02:54:32.573013', 'step': 15316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:32.627886', 'step': 15316, 'epoch': 3} {'type': 'loss', 'content': 0.08990534394979477, 'timestamp': '2025-09-10 02:54:32.630251', 'step': 15317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:32.685366', 'step': 15317, 'epoch': 3} {'type': 'loss', 'content': 0.19882111251354218, 'timestamp': '2025-09-10 02:54:32.687619', 'step': 15318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:32.742855', 'step': 15318, 'epoch': 3} {'type': 'loss', 'content': 0.08536739647388458, 'timestamp': '2025-09-10 02:54:32.745283', 'step': 15319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:32.799811', 'step': 15319, 'epoch': 3} {'type': 'loss', 'content': 0.12854766845703125, 'timestamp': '2025-09-10 02:54:32.806011', 'step': 15320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:32.860928', 'step': 15320, 'epoch': 3} {'type': 'loss', 'content': 0.10842981934547424, 'timestamp': '2025-09-10 02:54:32.863367', 'step': 15321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:32.917482', 'step': 15321, 'epoch': 3} {'type': 'loss', 'content': 0.10270240902900696, 'timestamp': '2025-09-10 02:54:32.919663', 'step': 15322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:32.974392', 'step': 15322, 'epoch': 3} {'type': 'loss', 'content': 0.12903162837028503, 'timestamp': '2025-09-10 02:54:32.976811', 'step': 15323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:33.031169', 'step': 15323, 'epoch': 3} {'type': 'loss', 'content': 0.04900648817420006, 'timestamp': '2025-09-10 02:54:33.037188', 'step': 15324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:33.091564', 'step': 15324, 'epoch': 3} {'type': 'loss', 'content': 0.12872257828712463, 'timestamp': '2025-09-10 02:54:33.093694', 'step': 15325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:33.147887', 'step': 15325, 'epoch': 3} {'type': 'loss', 'content': 0.08702017366886139, 'timestamp': '2025-09-10 02:54:33.150089', 'step': 15326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:33.206383', 'step': 15326, 'epoch': 3} {'type': 'loss', 'content': 0.05663292855024338, 'timestamp': '2025-09-10 02:54:33.208664', 'step': 15327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:33.264483', 'step': 15327, 'epoch': 3} {'type': 'loss', 'content': 0.08735693246126175, 'timestamp': '2025-09-10 02:54:33.271040', 'step': 15328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:33.325638', 'step': 15328, 'epoch': 3} {'type': 'loss', 'content': 0.08844584971666336, 'timestamp': '2025-09-10 02:54:33.327852', 'step': 15329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:33.382925', 'step': 15329, 'epoch': 3} {'type': 'loss', 'content': 0.11176259070634842, 'timestamp': '2025-09-10 02:54:33.385167', 'step': 15330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:33.439906', 'step': 15330, 'epoch': 3} {'type': 'loss', 'content': 0.19830076396465302, 'timestamp': '2025-09-10 02:54:33.442095', 'step': 15331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:33.496358', 'step': 15331, 'epoch': 3} {'type': 'loss', 'content': 0.13832460343837738, 'timestamp': '2025-09-10 02:54:33.502528', 'step': 15332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:33.556381', 'step': 15332, 'epoch': 3} {'type': 'loss', 'content': 0.1104375571012497, 'timestamp': '2025-09-10 02:54:33.558628', 'step': 15333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:33.612595', 'step': 15333, 'epoch': 3} {'type': 'loss', 'content': 0.06115642562508583, 'timestamp': '2025-09-10 02:54:33.614912', 'step': 15334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:33.669856', 'step': 15334, 'epoch': 3} {'type': 'loss', 'content': 0.053010243922472, 'timestamp': '2025-09-10 02:54:33.672126', 'step': 15335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:33.726646', 'step': 15335, 'epoch': 3} {'type': 'loss', 'content': 0.027587251737713814, 'timestamp': '2025-09-10 02:54:33.732960', 'step': 15336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:33.787425', 'step': 15336, 'epoch': 3} {'type': 'loss', 'content': 0.05471644178032875, 'timestamp': '2025-09-10 02:54:33.789878', 'step': 15337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:33.845347', 'step': 15337, 'epoch': 3} {'type': 'loss', 'content': 0.09445367753505707, 'timestamp': '2025-09-10 02:54:33.847572', 'step': 15338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:33.902073', 'step': 15338, 'epoch': 3} {'type': 'loss', 'content': 0.07170949876308441, 'timestamp': '2025-09-10 02:54:33.904500', 'step': 15339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:33.958361', 'step': 15339, 'epoch': 3} {'type': 'loss', 'content': 0.08814936131238937, 'timestamp': '2025-09-10 02:54:33.964708', 'step': 15340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:34.021017', 'step': 15340, 'epoch': 3} {'type': 'loss', 'content': 0.06974674016237259, 'timestamp': '2025-09-10 02:54:34.023051', 'step': 15341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:34.078787', 'step': 15341, 'epoch': 3} {'type': 'loss', 'content': 0.17012442648410797, 'timestamp': '2025-09-10 02:54:34.081089', 'step': 15342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:34.137052', 'step': 15342, 'epoch': 3} {'type': 'loss', 'content': 0.11255180835723877, 'timestamp': '2025-09-10 02:54:34.139211', 'step': 15343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:34.193820', 'step': 15343, 'epoch': 3} {'type': 'loss', 'content': 0.11392819136381149, 'timestamp': '2025-09-10 02:54:34.199871', 'step': 15344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:34.254211', 'step': 15344, 'epoch': 3} {'type': 'loss', 'content': 0.06264813989400864, 'timestamp': '2025-09-10 02:54:34.256290', 'step': 15345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:34.314109', 'step': 15345, 'epoch': 3} {'type': 'loss', 'content': 0.09947773814201355, 'timestamp': '2025-09-10 02:54:34.316283', 'step': 15346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:34.370747', 'step': 15346, 'epoch': 3} {'type': 'loss', 'content': 0.16035127639770508, 'timestamp': '2025-09-10 02:54:34.372901', 'step': 15347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:34.428611', 'step': 15347, 'epoch': 3} {'type': 'loss', 'content': 0.14063523709774017, 'timestamp': '2025-09-10 02:54:34.434746', 'step': 15348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:34.488973', 'step': 15348, 'epoch': 3} {'type': 'loss', 'content': 0.06158698722720146, 'timestamp': '2025-09-10 02:54:34.491144', 'step': 15349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:34.546064', 'step': 15349, 'epoch': 3} {'type': 'loss', 'content': 0.009303334169089794, 'timestamp': '2025-09-10 02:54:34.548374', 'step': 15350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:34.602909', 'step': 15350, 'epoch': 3} {'type': 'loss', 'content': 0.0839502289891243, 'timestamp': '2025-09-10 02:54:34.605012', 'step': 15351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:34.659518', 'step': 15351, 'epoch': 3} {'type': 'loss', 'content': 0.08911732584238052, 'timestamp': '2025-09-10 02:54:34.665531', 'step': 15352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:34.719167', 'step': 15352, 'epoch': 3} {'type': 'loss', 'content': 0.08842617273330688, 'timestamp': '2025-09-10 02:54:34.721495', 'step': 15353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:34.775628', 'step': 15353, 'epoch': 3} {'type': 'loss', 'content': 0.05955187976360321, 'timestamp': '2025-09-10 02:54:34.777786', 'step': 15354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:34.834050', 'step': 15354, 'epoch': 3} {'type': 'loss', 'content': 0.1790798157453537, 'timestamp': '2025-09-10 02:54:34.836147', 'step': 15355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:34.890720', 'step': 15355, 'epoch': 3} {'type': 'loss', 'content': 0.04632692039012909, 'timestamp': '2025-09-10 02:54:34.896962', 'step': 15356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:34.950682', 'step': 15356, 'epoch': 3} {'type': 'loss', 'content': 0.13540419936180115, 'timestamp': '2025-09-10 02:54:34.953072', 'step': 15357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:35.006887', 'step': 15357, 'epoch': 3} {'type': 'loss', 'content': 0.15203453600406647, 'timestamp': '2025-09-10 02:54:35.008873', 'step': 15358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:35.065780', 'step': 15358, 'epoch': 3} {'type': 'loss', 'content': 0.05433947592973709, 'timestamp': '2025-09-10 02:54:35.067987', 'step': 15359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:35.123585', 'step': 15359, 'epoch': 3} {'type': 'loss', 'content': 0.14247743785381317, 'timestamp': '2025-09-10 02:54:35.129642', 'step': 15360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:35.182826', 'step': 15360, 'epoch': 3} {'type': 'loss', 'content': 0.07426757365465164, 'timestamp': '2025-09-10 02:54:35.184940', 'step': 15361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:35.238659', 'step': 15361, 'epoch': 3} {'type': 'loss', 'content': 0.04105079919099808, 'timestamp': '2025-09-10 02:54:35.240773', 'step': 15362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:35.294715', 'step': 15362, 'epoch': 3} {'type': 'loss', 'content': 0.13769195973873138, 'timestamp': '2025-09-10 02:54:35.296945', 'step': 15363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:35.351049', 'step': 15363, 'epoch': 3} {'type': 'loss', 'content': 0.15919670462608337, 'timestamp': '2025-09-10 02:54:35.357046', 'step': 15364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:35.412467', 'step': 15364, 'epoch': 3} {'type': 'loss', 'content': 0.11935998499393463, 'timestamp': '2025-09-10 02:54:35.414557', 'step': 15365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:35.468560', 'step': 15365, 'epoch': 3} {'type': 'loss', 'content': 0.15438079833984375, 'timestamp': '2025-09-10 02:54:35.470994', 'step': 15366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:35.524855', 'step': 15366, 'epoch': 3} {'type': 'loss', 'content': 0.14610959589481354, 'timestamp': '2025-09-10 02:54:35.527004', 'step': 15367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:35.581725', 'step': 15367, 'epoch': 3} {'type': 'loss', 'content': 0.0861893817782402, 'timestamp': '2025-09-10 02:54:35.587760', 'step': 15368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:35.642003', 'step': 15368, 'epoch': 3} {'type': 'loss', 'content': 0.09059348702430725, 'timestamp': '2025-09-10 02:54:35.644088', 'step': 15369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:35.698643', 'step': 15369, 'epoch': 3} {'type': 'loss', 'content': 0.09575095772743225, 'timestamp': '2025-09-10 02:54:35.700778', 'step': 15370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:35.754201', 'step': 15370, 'epoch': 3} {'type': 'loss', 'content': 0.1286507397890091, 'timestamp': '2025-09-10 02:54:35.756415', 'step': 15371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:35.811269', 'step': 15371, 'epoch': 3} {'type': 'loss', 'content': 0.08927112072706223, 'timestamp': '2025-09-10 02:54:35.817248', 'step': 15372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:35.871297', 'step': 15372, 'epoch': 3} {'type': 'loss', 'content': 0.12102419137954712, 'timestamp': '2025-09-10 02:54:35.873429', 'step': 15373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:35.928577', 'step': 15373, 'epoch': 3} {'type': 'loss', 'content': 0.15860223770141602, 'timestamp': '2025-09-10 02:54:35.930832', 'step': 15374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:35.985782', 'step': 15374, 'epoch': 3} {'type': 'loss', 'content': 0.1266515702009201, 'timestamp': '2025-09-10 02:54:35.988007', 'step': 15375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:36.041823', 'step': 15375, 'epoch': 3} {'type': 'loss', 'content': 0.0660819485783577, 'timestamp': '2025-09-10 02:54:36.047855', 'step': 15376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:36.101505', 'step': 15376, 'epoch': 3} {'type': 'loss', 'content': 0.09289629012346268, 'timestamp': '2025-09-10 02:54:36.103704', 'step': 15377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:36.157836', 'step': 15377, 'epoch': 3} {'type': 'loss', 'content': 0.06928383558988571, 'timestamp': '2025-09-10 02:54:36.159984', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:54:49.088476', 'step': 15378, 'epoch': 3} {'type': 'pplx', 'content': 14062.2634962478, 'timestamp': '2025-09-10 02:54:49.091523', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:49.146196', 'step': 15378, 'epoch': 3} {'type': 'loss', 'content': 0.08005087077617645, 'timestamp': '2025-09-10 02:54:49.147901', 'step': 15379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:49.201988', 'step': 15379, 'epoch': 3} {'type': 'loss', 'content': 0.20446109771728516, 'timestamp': '2025-09-10 02:54:49.207731', 'step': 15380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:49.260135', 'step': 15380, 'epoch': 3} {'type': 'loss', 'content': 0.11858443915843964, 'timestamp': '2025-09-10 02:54:49.262108', 'step': 15381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:49.316619', 'step': 15381, 'epoch': 3} {'type': 'loss', 'content': 0.15984471142292023, 'timestamp': '2025-09-10 02:54:49.318595', 'step': 15382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:49.374732', 'step': 15382, 'epoch': 3} {'type': 'loss', 'content': 0.07309134304523468, 'timestamp': '2025-09-10 02:54:49.376630', 'step': 15383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:49.433542', 'step': 15383, 'epoch': 3} {'type': 'loss', 'content': 0.13406087458133698, 'timestamp': '2025-09-10 02:54:49.439519', 'step': 15384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:49.493897', 'step': 15384, 'epoch': 3} {'type': 'loss', 'content': 0.05813562497496605, 'timestamp': '2025-09-10 02:54:49.495960', 'step': 15385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:49.551760', 'step': 15385, 'epoch': 3} {'type': 'loss', 'content': 0.08280301094055176, 'timestamp': '2025-09-10 02:54:49.553470', 'step': 15386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:49.609215', 'step': 15386, 'epoch': 3} {'type': 'loss', 'content': 0.05123203992843628, 'timestamp': '2025-09-10 02:54:49.611092', 'step': 15387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:49.666740', 'step': 15387, 'epoch': 3} {'type': 'loss', 'content': 0.13302160799503326, 'timestamp': '2025-09-10 02:54:49.672692', 'step': 15388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:49.727481', 'step': 15388, 'epoch': 3} {'type': 'loss', 'content': 0.11652708798646927, 'timestamp': '2025-09-10 02:54:49.729380', 'step': 15389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:49.784009', 'step': 15389, 'epoch': 3} {'type': 'loss', 'content': 0.048584017902612686, 'timestamp': '2025-09-10 02:54:49.785770', 'step': 15390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:49.843440', 'step': 15390, 'epoch': 3} {'type': 'loss', 'content': 0.19995056092739105, 'timestamp': '2025-09-10 02:54:49.845445', 'step': 15391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:49.903014', 'step': 15391, 'epoch': 3} {'type': 'loss', 'content': 0.08632528781890869, 'timestamp': '2025-09-10 02:54:49.909253', 'step': 15392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:49.964837', 'step': 15392, 'epoch': 3} {'type': 'loss', 'content': 0.06987202167510986, 'timestamp': '2025-09-10 02:54:49.966860', 'step': 15393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:50.023533', 'step': 15393, 'epoch': 3} {'type': 'loss', 'content': 0.037188854068517685, 'timestamp': '2025-09-10 02:54:50.025558', 'step': 15394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:50.080115', 'step': 15394, 'epoch': 3} {'type': 'loss', 'content': 0.1666647046804428, 'timestamp': '2025-09-10 02:54:50.081882', 'step': 15395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:50.137091', 'step': 15395, 'epoch': 3} {'type': 'loss', 'content': 0.06961327791213989, 'timestamp': '2025-09-10 02:54:50.142948', 'step': 15396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:50.197872', 'step': 15396, 'epoch': 3} {'type': 'loss', 'content': 0.10050952434539795, 'timestamp': '2025-09-10 02:54:50.201410', 'step': 15397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:50.259971', 'step': 15397, 'epoch': 3} {'type': 'loss', 'content': 0.08936595916748047, 'timestamp': '2025-09-10 02:54:50.262018', 'step': 15398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:50.319065', 'step': 15398, 'epoch': 3} {'type': 'loss', 'content': 0.10866527259349823, 'timestamp': '2025-09-10 02:54:50.321159', 'step': 15399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:50.378573', 'step': 15399, 'epoch': 3} {'type': 'loss', 'content': 0.2282385677099228, 'timestamp': '2025-09-10 02:54:50.384750', 'step': 15400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:50.441431', 'step': 15400, 'epoch': 3} {'type': 'loss', 'content': 0.1442221999168396, 'timestamp': '2025-09-10 02:54:50.443387', 'step': 15401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:50.499931', 'step': 15401, 'epoch': 3} {'type': 'loss', 'content': 0.10583442449569702, 'timestamp': '2025-09-10 02:54:50.502074', 'step': 15402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:50.558390', 'step': 15402, 'epoch': 3} {'type': 'loss', 'content': 0.07806979864835739, 'timestamp': '2025-09-10 02:54:50.560184', 'step': 15403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:50.615276', 'step': 15403, 'epoch': 3} {'type': 'loss', 'content': 0.12025255709886551, 'timestamp': '2025-09-10 02:54:50.621007', 'step': 15404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:50.674959', 'step': 15404, 'epoch': 3} {'type': 'loss', 'content': 0.08420788496732712, 'timestamp': '2025-09-10 02:54:50.676663', 'step': 15405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:50.731891', 'step': 15405, 'epoch': 3} {'type': 'loss', 'content': 0.0656496211886406, 'timestamp': '2025-09-10 02:54:50.733597', 'step': 15406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:50.788351', 'step': 15406, 'epoch': 3} {'type': 'loss', 'content': 0.023386498913168907, 'timestamp': '2025-09-10 02:54:50.790371', 'step': 15407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:50.844332', 'step': 15407, 'epoch': 3} {'type': 'loss', 'content': 0.13279405236244202, 'timestamp': '2025-09-10 02:54:50.851419', 'step': 15408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:50.906408', 'step': 15408, 'epoch': 3} {'type': 'loss', 'content': 0.1013755276799202, 'timestamp': '2025-09-10 02:54:50.908370', 'step': 15409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:50.968012', 'step': 15409, 'epoch': 3} {'type': 'loss', 'content': 0.12928089499473572, 'timestamp': '2025-09-10 02:54:50.970059', 'step': 15410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:51.026662', 'step': 15410, 'epoch': 3} {'type': 'loss', 'content': 0.13931907713413239, 'timestamp': '2025-09-10 02:54:51.028761', 'step': 15411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:51.082994', 'step': 15411, 'epoch': 3} {'type': 'loss', 'content': 0.04891400411725044, 'timestamp': '2025-09-10 02:54:51.088755', 'step': 15412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:51.143295', 'step': 15412, 'epoch': 3} {'type': 'loss', 'content': 0.08988183736801147, 'timestamp': '2025-09-10 02:54:51.144961', 'step': 15413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:51.200664', 'step': 15413, 'epoch': 3} {'type': 'loss', 'content': 0.12166746705770493, 'timestamp': '2025-09-10 02:54:51.202779', 'step': 15414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:51.257848', 'step': 15414, 'epoch': 3} {'type': 'loss', 'content': 0.14426711201667786, 'timestamp': '2025-09-10 02:54:51.259803', 'step': 15415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:51.315130', 'step': 15415, 'epoch': 3} {'type': 'loss', 'content': 0.1582488715648651, 'timestamp': '2025-09-10 02:54:51.321310', 'step': 15416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:51.377211', 'step': 15416, 'epoch': 3} {'type': 'loss', 'content': 0.1013355702161789, 'timestamp': '2025-09-10 02:54:51.379323', 'step': 15417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:51.436215', 'step': 15417, 'epoch': 3} {'type': 'loss', 'content': 0.0993189588189125, 'timestamp': '2025-09-10 02:54:51.438191', 'step': 15418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:51.495200', 'step': 15418, 'epoch': 3} {'type': 'loss', 'content': 0.03930934518575668, 'timestamp': '2025-09-10 02:54:51.496922', 'step': 15419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:51.554706', 'step': 15419, 'epoch': 3} {'type': 'loss', 'content': 0.04651060700416565, 'timestamp': '2025-09-10 02:54:51.560482', 'step': 15420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:51.615955', 'step': 15420, 'epoch': 3} {'type': 'loss', 'content': 0.03659677132964134, 'timestamp': '2025-09-10 02:54:51.617669', 'step': 15421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:51.672806', 'step': 15421, 'epoch': 3} {'type': 'loss', 'content': 0.1373109668493271, 'timestamp': '2025-09-10 02:54:51.676341', 'step': 15422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:51.732237', 'step': 15422, 'epoch': 3} {'type': 'loss', 'content': 0.07959520071744919, 'timestamp': '2025-09-10 02:54:51.734272', 'step': 15423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:51.789394', 'step': 15423, 'epoch': 3} {'type': 'loss', 'content': 0.08657287061214447, 'timestamp': '2025-09-10 02:54:51.795366', 'step': 15424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:51.852237', 'step': 15424, 'epoch': 3} {'type': 'loss', 'content': 0.0709160715341568, 'timestamp': '2025-09-10 02:54:51.854171', 'step': 15425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:51.910131', 'step': 15425, 'epoch': 3} {'type': 'loss', 'content': 0.11243480443954468, 'timestamp': '2025-09-10 02:54:51.912083', 'step': 15426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:51.967627', 'step': 15426, 'epoch': 3} {'type': 'loss', 'content': 0.14615975320339203, 'timestamp': '2025-09-10 02:54:51.969670', 'step': 15427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:52.027007', 'step': 15427, 'epoch': 3} {'type': 'loss', 'content': 0.06664809584617615, 'timestamp': '2025-09-10 02:54:52.033164', 'step': 15428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:52.087677', 'step': 15428, 'epoch': 3} {'type': 'loss', 'content': 0.08162175863981247, 'timestamp': '2025-09-10 02:54:52.089629', 'step': 15429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:52.145281', 'step': 15429, 'epoch': 3} {'type': 'loss', 'content': 0.04222509637475014, 'timestamp': '2025-09-10 02:54:52.147270', 'step': 15430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:52.204584', 'step': 15430, 'epoch': 3} {'type': 'loss', 'content': 0.049488287419080734, 'timestamp': '2025-09-10 02:54:52.206675', 'step': 15431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:52.260548', 'step': 15431, 'epoch': 3} {'type': 'loss', 'content': 0.06301819533109665, 'timestamp': '2025-09-10 02:54:52.266739', 'step': 15432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:52.321759', 'step': 15432, 'epoch': 3} {'type': 'loss', 'content': 0.18046611547470093, 'timestamp': '2025-09-10 02:54:52.323718', 'step': 15433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:52.379020', 'step': 15433, 'epoch': 3} {'type': 'loss', 'content': 0.14815416932106018, 'timestamp': '2025-09-10 02:54:52.381067', 'step': 15434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:52.435898', 'step': 15434, 'epoch': 3} {'type': 'loss', 'content': 0.07168373465538025, 'timestamp': '2025-09-10 02:54:52.437898', 'step': 15435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:52.492383', 'step': 15435, 'epoch': 3} {'type': 'loss', 'content': 0.09070923179388046, 'timestamp': '2025-09-10 02:54:52.499329', 'step': 15436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:52.554296', 'step': 15436, 'epoch': 3} {'type': 'loss', 'content': 0.15479283034801483, 'timestamp': '2025-09-10 02:54:52.556950', 'step': 15437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:52.612778', 'step': 15437, 'epoch': 3} {'type': 'loss', 'content': 0.061071448028087616, 'timestamp': '2025-09-10 02:54:52.614708', 'step': 15438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:52.671530', 'step': 15438, 'epoch': 3} {'type': 'loss', 'content': 0.1377599984407425, 'timestamp': '2025-09-10 02:54:52.673501', 'step': 15439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:52.729639', 'step': 15439, 'epoch': 3} {'type': 'loss', 'content': 0.10006128996610641, 'timestamp': '2025-09-10 02:54:52.735679', 'step': 15440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:52.789880', 'step': 15440, 'epoch': 3} {'type': 'loss', 'content': 0.07612378895282745, 'timestamp': '2025-09-10 02:54:52.792006', 'step': 15441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:52.848820', 'step': 15441, 'epoch': 3} {'type': 'loss', 'content': 0.22587299346923828, 'timestamp': '2025-09-10 02:54:52.850956', 'step': 15442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:52.906609', 'step': 15442, 'epoch': 3} {'type': 'loss', 'content': 0.09138669818639755, 'timestamp': '2025-09-10 02:54:52.908651', 'step': 15443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:52.963550', 'step': 15443, 'epoch': 3} {'type': 'loss', 'content': 0.018407553434371948, 'timestamp': '2025-09-10 02:54:52.969434', 'step': 15444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:53.022705', 'step': 15444, 'epoch': 3} {'type': 'loss', 'content': 0.05824526399374008, 'timestamp': '2025-09-10 02:54:53.028972', 'step': 15445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:53.084738', 'step': 15445, 'epoch': 3} {'type': 'loss', 'content': 0.1441696584224701, 'timestamp': '2025-09-10 02:54:53.086885', 'step': 15446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:53.145902', 'step': 15446, 'epoch': 3} {'type': 'loss', 'content': 0.12350356578826904, 'timestamp': '2025-09-10 02:54:53.148188', 'step': 15447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:53.202875', 'step': 15447, 'epoch': 3} {'type': 'loss', 'content': 0.1351391077041626, 'timestamp': '2025-09-10 02:54:53.211134', 'step': 15448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:53.273081', 'step': 15448, 'epoch': 3} {'type': 'loss', 'content': 0.1010453850030899, 'timestamp': '2025-09-10 02:54:53.275745', 'step': 15449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:53.333618', 'step': 15449, 'epoch': 3} {'type': 'loss', 'content': 0.13653603196144104, 'timestamp': '2025-09-10 02:54:53.335893', 'step': 15450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:53.398067', 'step': 15450, 'epoch': 3} {'type': 'loss', 'content': 0.08990875631570816, 'timestamp': '2025-09-10 02:54:53.400194', 'step': 15451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:53.454693', 'step': 15451, 'epoch': 3} {'type': 'loss', 'content': 0.0461982786655426, 'timestamp': '2025-09-10 02:54:53.460850', 'step': 15452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:53.514281', 'step': 15452, 'epoch': 3} {'type': 'loss', 'content': 0.10357370227575302, 'timestamp': '2025-09-10 02:54:53.516447', 'step': 15453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:53.570410', 'step': 15453, 'epoch': 3} {'type': 'loss', 'content': 0.09949879348278046, 'timestamp': '2025-09-10 02:54:53.572973', 'step': 15454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:53.640387', 'step': 15454, 'epoch': 3} {'type': 'loss', 'content': 0.1406865268945694, 'timestamp': '2025-09-10 02:54:53.642118', 'step': 15455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:53.696116', 'step': 15455, 'epoch': 3} {'type': 'loss', 'content': 0.16501028835773468, 'timestamp': '2025-09-10 02:54:53.703058', 'step': 15456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:53.758663', 'step': 15456, 'epoch': 3} {'type': 'loss', 'content': 0.06696785986423492, 'timestamp': '2025-09-10 02:54:53.760797', 'step': 15457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:53.815908', 'step': 15457, 'epoch': 3} {'type': 'loss', 'content': 0.04930423945188522, 'timestamp': '2025-09-10 02:54:53.818034', 'step': 15458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:53.873257', 'step': 15458, 'epoch': 3} {'type': 'loss', 'content': 0.17873498797416687, 'timestamp': '2025-09-10 02:54:53.876139', 'step': 15459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:53.931507', 'step': 15459, 'epoch': 3} {'type': 'loss', 'content': 0.11301865428686142, 'timestamp': '2025-09-10 02:54:53.937610', 'step': 15460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:53.990244', 'step': 15460, 'epoch': 3} {'type': 'loss', 'content': 0.08283352851867676, 'timestamp': '2025-09-10 02:54:53.992372', 'step': 15461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.046072', 'step': 15461, 'epoch': 3} {'type': 'loss', 'content': 0.09184541553258896, 'timestamp': '2025-09-10 02:54:54.047991', 'step': 15462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.100962', 'step': 15462, 'epoch': 3} {'type': 'loss', 'content': 0.06521657854318619, 'timestamp': '2025-09-10 02:54:54.102896', 'step': 15463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:54.156251', 'step': 15463, 'epoch': 3} {'type': 'loss', 'content': 0.04911601543426514, 'timestamp': '2025-09-10 02:54:54.162109', 'step': 15464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:54.214533', 'step': 15464, 'epoch': 3} {'type': 'loss', 'content': 0.10953249782323837, 'timestamp': '2025-09-10 02:54:54.216550', 'step': 15465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.269546', 'step': 15465, 'epoch': 3} {'type': 'loss', 'content': 0.10281340777873993, 'timestamp': '2025-09-10 02:54:54.271452', 'step': 15466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:54.324600', 'step': 15466, 'epoch': 3} {'type': 'loss', 'content': 0.0681358203291893, 'timestamp': '2025-09-10 02:54:54.326652', 'step': 15467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:54.379685', 'step': 15467, 'epoch': 3} {'type': 'loss', 'content': 0.17854851484298706, 'timestamp': '2025-09-10 02:54:54.385579', 'step': 15468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:54.437786', 'step': 15468, 'epoch': 3} {'type': 'loss', 'content': 0.13328388333320618, 'timestamp': '2025-09-10 02:54:54.439700', 'step': 15469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.493119', 'step': 15469, 'epoch': 3} {'type': 'loss', 'content': 0.054521456360816956, 'timestamp': '2025-09-10 02:54:54.495141', 'step': 15470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.549058', 'step': 15470, 'epoch': 3} {'type': 'loss', 'content': 0.0835360661149025, 'timestamp': '2025-09-10 02:54:54.551029', 'step': 15471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.603625', 'step': 15471, 'epoch': 3} {'type': 'loss', 'content': 0.14661164581775665, 'timestamp': '2025-09-10 02:54:54.609320', 'step': 15472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:54.661518', 'step': 15472, 'epoch': 3} {'type': 'loss', 'content': 0.09837903827428818, 'timestamp': '2025-09-10 02:54:54.663439', 'step': 15473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:54.717082', 'step': 15473, 'epoch': 3} {'type': 'loss', 'content': 0.10176485031843185, 'timestamp': '2025-09-10 02:54:54.719152', 'step': 15474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.772703', 'step': 15474, 'epoch': 3} {'type': 'loss', 'content': 0.11210791766643524, 'timestamp': '2025-09-10 02:54:54.774822', 'step': 15475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.827372', 'step': 15475, 'epoch': 3} {'type': 'loss', 'content': 0.09288916736841202, 'timestamp': '2025-09-10 02:54:54.833080', 'step': 15476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.886474', 'step': 15476, 'epoch': 3} {'type': 'loss', 'content': 0.06366012990474701, 'timestamp': '2025-09-10 02:54:54.888526', 'step': 15477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:54.941796', 'step': 15477, 'epoch': 3} {'type': 'loss', 'content': 0.05368833243846893, 'timestamp': '2025-09-10 02:54:54.943715', 'step': 15478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:54.996665', 'step': 15478, 'epoch': 3} {'type': 'loss', 'content': 0.08308002352714539, 'timestamp': '2025-09-10 02:54:54.998597', 'step': 15479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:55.051212', 'step': 15479, 'epoch': 3} {'type': 'loss', 'content': 0.06151255965232849, 'timestamp': '2025-09-10 02:54:55.056833', 'step': 15480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:55.109731', 'step': 15480, 'epoch': 3} {'type': 'loss', 'content': 0.13315153121948242, 'timestamp': '2025-09-10 02:54:55.111648', 'step': 15481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:55.164914', 'step': 15481, 'epoch': 3} {'type': 'loss', 'content': 0.06975749880075455, 'timestamp': '2025-09-10 02:54:55.166819', 'step': 15482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:55.220540', 'step': 15482, 'epoch': 3} {'type': 'loss', 'content': 0.09730403125286102, 'timestamp': '2025-09-10 02:54:55.222461', 'step': 15483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:55.276852', 'step': 15483, 'epoch': 3} {'type': 'loss', 'content': 0.17528435587882996, 'timestamp': '2025-09-10 02:54:55.282634', 'step': 15484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:55.336379', 'step': 15484, 'epoch': 3} {'type': 'loss', 'content': 0.1530158817768097, 'timestamp': '2025-09-10 02:54:55.343612', 'step': 15485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:55.399987', 'step': 15485, 'epoch': 3} {'type': 'loss', 'content': 0.11279058456420898, 'timestamp': '2025-09-10 02:54:55.402064', 'step': 15486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:55.455668', 'step': 15486, 'epoch': 3} {'type': 'loss', 'content': 0.15859495103359222, 'timestamp': '2025-09-10 02:54:55.457677', 'step': 15487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:55.510416', 'step': 15487, 'epoch': 3} {'type': 'loss', 'content': 0.1031160056591034, 'timestamp': '2025-09-10 02:54:55.516199', 'step': 15488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:55.568895', 'step': 15488, 'epoch': 3} {'type': 'loss', 'content': 0.0878998190164566, 'timestamp': '2025-09-10 02:54:55.571315', 'step': 15489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:55.625176', 'step': 15489, 'epoch': 3} {'type': 'loss', 'content': 0.1534954458475113, 'timestamp': '2025-09-10 02:54:55.627209', 'step': 15490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:55.681108', 'step': 15490, 'epoch': 3} {'type': 'loss', 'content': 0.10980448871850967, 'timestamp': '2025-09-10 02:54:55.683056', 'step': 15491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:55.736228', 'step': 15491, 'epoch': 3} {'type': 'loss', 'content': 0.0514359287917614, 'timestamp': '2025-09-10 02:54:55.742160', 'step': 15492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:55.794934', 'step': 15492, 'epoch': 3} {'type': 'loss', 'content': 0.057577334344387054, 'timestamp': '2025-09-10 02:54:55.796871', 'step': 15493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:55.850653', 'step': 15493, 'epoch': 3} {'type': 'loss', 'content': 0.26344576478004456, 'timestamp': '2025-09-10 02:54:55.852692', 'step': 15494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:55.905951', 'step': 15494, 'epoch': 3} {'type': 'loss', 'content': 0.033504974097013474, 'timestamp': '2025-09-10 02:54:55.908014', 'step': 15495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:55.960773', 'step': 15495, 'epoch': 3} {'type': 'loss', 'content': 0.08815018832683563, 'timestamp': '2025-09-10 02:54:55.973154', 'step': 15496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:56.028602', 'step': 15496, 'epoch': 3} {'type': 'loss', 'content': 0.03409437835216522, 'timestamp': '2025-09-10 02:54:56.030620', 'step': 15497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:56.083313', 'step': 15497, 'epoch': 3} {'type': 'loss', 'content': 0.1547728180885315, 'timestamp': '2025-09-10 02:54:56.092473', 'step': 15498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:56.147872', 'step': 15498, 'epoch': 3} {'type': 'loss', 'content': 0.13776558637619019, 'timestamp': '2025-09-10 02:54:56.149884', 'step': 15499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:56.203349', 'step': 15499, 'epoch': 3} {'type': 'loss', 'content': 0.12982122600078583, 'timestamp': '2025-09-10 02:54:56.209111', 'step': 15500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15500', 'timestamp': '2025-09-10 02:54:56.618181', 'step': 15500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:56.676165', 'step': 15500, 'epoch': 3} {'type': 'loss', 'content': 0.05042504146695137, 'timestamp': '2025-09-10 02:54:56.679346', 'step': 15501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:56.734963', 'step': 15501, 'epoch': 3} {'type': 'loss', 'content': 0.07832934707403183, 'timestamp': '2025-09-10 02:54:56.737011', 'step': 15502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:56.791650', 'step': 15502, 'epoch': 3} {'type': 'loss', 'content': 0.07368872314691544, 'timestamp': '2025-09-10 02:54:56.794635', 'step': 15503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:56.849088', 'step': 15503, 'epoch': 3} {'type': 'loss', 'content': 0.14280320703983307, 'timestamp': '2025-09-10 02:54:56.855006', 'step': 15504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:56.907965', 'step': 15504, 'epoch': 3} {'type': 'loss', 'content': 0.12298363447189331, 'timestamp': '2025-09-10 02:54:56.910045', 'step': 15505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:56.967064', 'step': 15505, 'epoch': 3} {'type': 'loss', 'content': 0.07475458085536957, 'timestamp': '2025-09-10 02:54:56.973452', 'step': 15506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:57.035847', 'step': 15506, 'epoch': 3} {'type': 'loss', 'content': 0.0993712767958641, 'timestamp': '2025-09-10 02:54:57.037804', 'step': 15507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:57.094656', 'step': 15507, 'epoch': 3} {'type': 'loss', 'content': 0.1147480234503746, 'timestamp': '2025-09-10 02:54:57.100457', 'step': 15508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:57.154299', 'step': 15508, 'epoch': 3} {'type': 'loss', 'content': 0.16218456625938416, 'timestamp': '2025-09-10 02:54:57.156350', 'step': 15509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:57.211873', 'step': 15509, 'epoch': 3} {'type': 'loss', 'content': 0.07520369440317154, 'timestamp': '2025-09-10 02:54:57.213814', 'step': 15510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:57.270227', 'step': 15510, 'epoch': 3} {'type': 'loss', 'content': 0.10247311741113663, 'timestamp': '2025-09-10 02:54:57.272249', 'step': 15511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:57.326830', 'step': 15511, 'epoch': 3} {'type': 'loss', 'content': 0.10212133079767227, 'timestamp': '2025-09-10 02:54:57.332557', 'step': 15512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:57.390653', 'step': 15512, 'epoch': 3} {'type': 'loss', 'content': 0.15356852114200592, 'timestamp': '2025-09-10 02:54:57.392629', 'step': 15513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:57.446506', 'step': 15513, 'epoch': 3} {'type': 'loss', 'content': 0.089389368891716, 'timestamp': '2025-09-10 02:54:57.449467', 'step': 15514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:57.504628', 'step': 15514, 'epoch': 3} {'type': 'loss', 'content': 0.11725634336471558, 'timestamp': '2025-09-10 02:54:57.506643', 'step': 15515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:57.559863', 'step': 15515, 'epoch': 3} {'type': 'loss', 'content': 0.20054444670677185, 'timestamp': '2025-09-10 02:54:57.565813', 'step': 15516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:57.619342', 'step': 15516, 'epoch': 3} {'type': 'loss', 'content': 0.11046340316534042, 'timestamp': '2025-09-10 02:54:57.621516', 'step': 15517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:57.675181', 'step': 15517, 'epoch': 3} {'type': 'loss', 'content': 0.10548719018697739, 'timestamp': '2025-09-10 02:54:57.677432', 'step': 15518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:57.731324', 'step': 15518, 'epoch': 3} {'type': 'loss', 'content': 0.11841722577810287, 'timestamp': '2025-09-10 02:54:57.733441', 'step': 15519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:57.786232', 'step': 15519, 'epoch': 3} {'type': 'loss', 'content': 0.10045120120048523, 'timestamp': '2025-09-10 02:54:57.791980', 'step': 15520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:57.844748', 'step': 15520, 'epoch': 3} {'type': 'loss', 'content': 0.038092441856861115, 'timestamp': '2025-09-10 02:54:57.846795', 'step': 15521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:57.900265', 'step': 15521, 'epoch': 3} {'type': 'loss', 'content': 0.1804209053516388, 'timestamp': '2025-09-10 02:54:57.902540', 'step': 15522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:57.955919', 'step': 15522, 'epoch': 3} {'type': 'loss', 'content': 0.13040299713611603, 'timestamp': '2025-09-10 02:54:57.957858', 'step': 15523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:58.013669', 'step': 15523, 'epoch': 3} {'type': 'loss', 'content': 0.11874062567949295, 'timestamp': '2025-09-10 02:54:58.019452', 'step': 15524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:58.073018', 'step': 15524, 'epoch': 3} {'type': 'loss', 'content': 0.08630203455686569, 'timestamp': '2025-09-10 02:54:58.075096', 'step': 15525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:58.128254', 'step': 15525, 'epoch': 3} {'type': 'loss', 'content': 0.06729407608509064, 'timestamp': '2025-09-10 02:54:58.130207', 'step': 15526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:58.183335', 'step': 15526, 'epoch': 3} {'type': 'loss', 'content': 0.048865221440792084, 'timestamp': '2025-09-10 02:54:58.185383', 'step': 15527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:58.238317', 'step': 15527, 'epoch': 3} {'type': 'loss', 'content': 0.10618826746940613, 'timestamp': '2025-09-10 02:54:58.244055', 'step': 15528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:58.296048', 'step': 15528, 'epoch': 3} {'type': 'loss', 'content': 0.024495599791407585, 'timestamp': '2025-09-10 02:54:58.297998', 'step': 15529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:58.351559', 'step': 15529, 'epoch': 3} {'type': 'loss', 'content': 0.11685501039028168, 'timestamp': '2025-09-10 02:54:58.353504', 'step': 15530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:58.407192', 'step': 15530, 'epoch': 3} {'type': 'loss', 'content': 0.1440427601337433, 'timestamp': '2025-09-10 02:54:58.409308', 'step': 15531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:58.462785', 'step': 15531, 'epoch': 3} {'type': 'loss', 'content': 0.11478598415851593, 'timestamp': '2025-09-10 02:54:58.468540', 'step': 15532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:58.521282', 'step': 15532, 'epoch': 3} {'type': 'loss', 'content': 0.07646553218364716, 'timestamp': '2025-09-10 02:54:58.523457', 'step': 15533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:58.577629', 'step': 15533, 'epoch': 3} {'type': 'loss', 'content': 0.07550456374883652, 'timestamp': '2025-09-10 02:54:58.579562', 'step': 15534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:58.632535', 'step': 15534, 'epoch': 3} {'type': 'loss', 'content': 0.17084145545959473, 'timestamp': '2025-09-10 02:54:58.634487', 'step': 15535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:58.688459', 'step': 15535, 'epoch': 3} {'type': 'loss', 'content': 0.09864892065525055, 'timestamp': '2025-09-10 02:54:58.694220', 'step': 15536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:58.746497', 'step': 15536, 'epoch': 3} {'type': 'loss', 'content': 0.1640409231185913, 'timestamp': '2025-09-10 02:54:58.748436', 'step': 15537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:58.802235', 'step': 15537, 'epoch': 3} {'type': 'loss', 'content': 0.11842826008796692, 'timestamp': '2025-09-10 02:54:58.804178', 'step': 15538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:58.857616', 'step': 15538, 'epoch': 3} {'type': 'loss', 'content': 0.14486326277256012, 'timestamp': '2025-09-10 02:54:58.859697', 'step': 15539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:58.913254', 'step': 15539, 'epoch': 3} {'type': 'loss', 'content': 0.14338116347789764, 'timestamp': '2025-09-10 02:54:58.918851', 'step': 15540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:58.972690', 'step': 15540, 'epoch': 3} {'type': 'loss', 'content': 0.11618821322917938, 'timestamp': '2025-09-10 02:54:58.974663', 'step': 15541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:59.027863', 'step': 15541, 'epoch': 3} {'type': 'loss', 'content': 0.11412722617387772, 'timestamp': '2025-09-10 02:54:59.029862', 'step': 15542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:59.083858', 'step': 15542, 'epoch': 3} {'type': 'loss', 'content': 0.12843778729438782, 'timestamp': '2025-09-10 02:54:59.085858', 'step': 15543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:59.139238', 'step': 15543, 'epoch': 3} {'type': 'loss', 'content': 0.09970588237047195, 'timestamp': '2025-09-10 02:54:59.145083', 'step': 15544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:59.214466', 'step': 15544, 'epoch': 3} {'type': 'loss', 'content': 0.06878673285245895, 'timestamp': '2025-09-10 02:54:59.216642', 'step': 15545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:59.294478', 'step': 15545, 'epoch': 3} {'type': 'loss', 'content': 0.1420375108718872, 'timestamp': '2025-09-10 02:54:59.296707', 'step': 15546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:59.367278', 'step': 15546, 'epoch': 3} {'type': 'loss', 'content': 0.2128554731607437, 'timestamp': '2025-09-10 02:54:59.369363', 'step': 15547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:54:59.429071', 'step': 15547, 'epoch': 3} {'type': 'loss', 'content': 0.11534267663955688, 'timestamp': '2025-09-10 02:54:59.434819', 'step': 15548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:59.487525', 'step': 15548, 'epoch': 3} {'type': 'loss', 'content': 0.14161550998687744, 'timestamp': '2025-09-10 02:54:59.489865', 'step': 15549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:59.543822', 'step': 15549, 'epoch': 3} {'type': 'loss', 'content': 0.04294276982545853, 'timestamp': '2025-09-10 02:54:59.546007', 'step': 15550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:59.600756', 'step': 15550, 'epoch': 3} {'type': 'loss', 'content': 0.07518995553255081, 'timestamp': '2025-09-10 02:54:59.602791', 'step': 15551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:54:59.658408', 'step': 15551, 'epoch': 3} {'type': 'loss', 'content': 0.09080857038497925, 'timestamp': '2025-09-10 02:54:59.664658', 'step': 15552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:59.717346', 'step': 15552, 'epoch': 3} {'type': 'loss', 'content': 0.11283818632364273, 'timestamp': '2025-09-10 02:54:59.719501', 'step': 15553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:59.773384', 'step': 15553, 'epoch': 3} {'type': 'loss', 'content': 0.12106212228536606, 'timestamp': '2025-09-10 02:54:59.775606', 'step': 15554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:59.830212', 'step': 15554, 'epoch': 3} {'type': 'loss', 'content': 0.09712699055671692, 'timestamp': '2025-09-10 02:54:59.832454', 'step': 15555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:54:59.888812', 'step': 15555, 'epoch': 3} {'type': 'loss', 'content': 0.12805484235286713, 'timestamp': '2025-09-10 02:54:59.894745', 'step': 15556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:54:59.947015', 'step': 15556, 'epoch': 3} {'type': 'loss', 'content': 0.08417994529008865, 'timestamp': '2025-09-10 02:54:59.949188', 'step': 15557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:00.002692', 'step': 15557, 'epoch': 3} {'type': 'loss', 'content': 0.11496628075838089, 'timestamp': '2025-09-10 02:55:00.004789', 'step': 15558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:00.058285', 'step': 15558, 'epoch': 3} {'type': 'loss', 'content': 0.13198088109493256, 'timestamp': '2025-09-10 02:55:00.060469', 'step': 15559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:00.114257', 'step': 15559, 'epoch': 3} {'type': 'loss', 'content': 0.09688927978277206, 'timestamp': '2025-09-10 02:55:00.120159', 'step': 15560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:00.173103', 'step': 15560, 'epoch': 3} {'type': 'loss', 'content': 0.06647469848394394, 'timestamp': '2025-09-10 02:55:00.175229', 'step': 15561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:00.228265', 'step': 15561, 'epoch': 3} {'type': 'loss', 'content': 0.042856600135564804, 'timestamp': '2025-09-10 02:55:00.230430', 'step': 15562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:00.283375', 'step': 15562, 'epoch': 3} {'type': 'loss', 'content': 0.11926295608282089, 'timestamp': '2025-09-10 02:55:00.285380', 'step': 15563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:00.338835', 'step': 15563, 'epoch': 3} {'type': 'loss', 'content': 0.09916947036981583, 'timestamp': '2025-09-10 02:55:00.344580', 'step': 15564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:00.396822', 'step': 15564, 'epoch': 3} {'type': 'loss', 'content': 0.1195039376616478, 'timestamp': '2025-09-10 02:55:00.398981', 'step': 15565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:00.452405', 'step': 15565, 'epoch': 3} {'type': 'loss', 'content': 0.04220011830329895, 'timestamp': '2025-09-10 02:55:00.454469', 'step': 15566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:00.508423', 'step': 15566, 'epoch': 3} {'type': 'loss', 'content': 0.13869331777095795, 'timestamp': '2025-09-10 02:55:00.510448', 'step': 15567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:00.569070', 'step': 15567, 'epoch': 3} {'type': 'loss', 'content': 0.14098471403121948, 'timestamp': '2025-09-10 02:55:00.574924', 'step': 15568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:00.629415', 'step': 15568, 'epoch': 3} {'type': 'loss', 'content': 0.10124833881855011, 'timestamp': '2025-09-10 02:55:00.631457', 'step': 15569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:00.685125', 'step': 15569, 'epoch': 3} {'type': 'loss', 'content': 0.09375927597284317, 'timestamp': '2025-09-10 02:55:00.687300', 'step': 15570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:00.743300', 'step': 15570, 'epoch': 3} {'type': 'loss', 'content': 0.15852093696594238, 'timestamp': '2025-09-10 02:55:00.745497', 'step': 15571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:00.799186', 'step': 15571, 'epoch': 3} {'type': 'loss', 'content': 0.03779773414134979, 'timestamp': '2025-09-10 02:55:00.804979', 'step': 15572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:00.858369', 'step': 15572, 'epoch': 3} {'type': 'loss', 'content': 0.1050054132938385, 'timestamp': '2025-09-10 02:55:00.860462', 'step': 15573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:00.913863', 'step': 15573, 'epoch': 3} {'type': 'loss', 'content': 0.1082337498664856, 'timestamp': '2025-09-10 02:55:00.917906', 'step': 15574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:00.973929', 'step': 15574, 'epoch': 3} {'type': 'loss', 'content': 0.1493300348520279, 'timestamp': '2025-09-10 02:55:00.976214', 'step': 15575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:01.029348', 'step': 15575, 'epoch': 3} {'type': 'loss', 'content': 0.08758295327425003, 'timestamp': '2025-09-10 02:55:01.035093', 'step': 15576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:01.087906', 'step': 15576, 'epoch': 3} {'type': 'loss', 'content': 0.08810248225927353, 'timestamp': '2025-09-10 02:55:01.090080', 'step': 15577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:01.143876', 'step': 15577, 'epoch': 3} {'type': 'loss', 'content': 0.06783878058195114, 'timestamp': '2025-09-10 02:55:01.145973', 'step': 15578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:01.199149', 'step': 15578, 'epoch': 3} {'type': 'loss', 'content': 0.0853230282664299, 'timestamp': '2025-09-10 02:55:01.202863', 'step': 15579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:01.261951', 'step': 15579, 'epoch': 3} {'type': 'loss', 'content': 0.09811258316040039, 'timestamp': '2025-09-10 02:55:01.267763', 'step': 15580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:01.320507', 'step': 15580, 'epoch': 3} {'type': 'loss', 'content': 0.15278011560440063, 'timestamp': '2025-09-10 02:55:01.322496', 'step': 15581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:01.384678', 'step': 15581, 'epoch': 3} {'type': 'loss', 'content': 0.09988544136285782, 'timestamp': '2025-09-10 02:55:01.388600', 'step': 15582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:01.443473', 'step': 15582, 'epoch': 3} {'type': 'loss', 'content': 0.1791081577539444, 'timestamp': '2025-09-10 02:55:01.446715', 'step': 15583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:01.502821', 'step': 15583, 'epoch': 3} {'type': 'loss', 'content': 0.11523289233446121, 'timestamp': '2025-09-10 02:55:01.508569', 'step': 15584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:01.561104', 'step': 15584, 'epoch': 3} {'type': 'loss', 'content': 0.08707047998905182, 'timestamp': '2025-09-10 02:55:01.563207', 'step': 15585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:01.616795', 'step': 15585, 'epoch': 3} {'type': 'loss', 'content': 0.08257138729095459, 'timestamp': '2025-09-10 02:55:01.620962', 'step': 15586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:01.676297', 'step': 15586, 'epoch': 3} {'type': 'loss', 'content': 0.19208088517189026, 'timestamp': '2025-09-10 02:55:01.678469', 'step': 15587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:01.731930', 'step': 15587, 'epoch': 3} {'type': 'loss', 'content': 0.13216528296470642, 'timestamp': '2025-09-10 02:55:01.737921', 'step': 15588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:01.793528', 'step': 15588, 'epoch': 3} {'type': 'loss', 'content': 0.23681628704071045, 'timestamp': '2025-09-10 02:55:01.795826', 'step': 15589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:01.857619', 'step': 15589, 'epoch': 3} {'type': 'loss', 'content': 0.08482370525598526, 'timestamp': '2025-09-10 02:55:01.859911', 'step': 15590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:01.914249', 'step': 15590, 'epoch': 3} {'type': 'loss', 'content': 0.08788635581731796, 'timestamp': '2025-09-10 02:55:01.916352', 'step': 15591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:01.969308', 'step': 15591, 'epoch': 3} {'type': 'loss', 'content': 0.060738950967788696, 'timestamp': '2025-09-10 02:55:01.974886', 'step': 15592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:02.028302', 'step': 15592, 'epoch': 3} {'type': 'loss', 'content': 0.11791550368070602, 'timestamp': '2025-09-10 02:55:02.030228', 'step': 15593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:02.093686', 'step': 15593, 'epoch': 3} {'type': 'loss', 'content': 0.12277893722057343, 'timestamp': '2025-09-10 02:55:02.095946', 'step': 15594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:02.150962', 'step': 15594, 'epoch': 3} {'type': 'loss', 'content': 0.1392083317041397, 'timestamp': '2025-09-10 02:55:02.153322', 'step': 15595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:02.209063', 'step': 15595, 'epoch': 3} {'type': 'loss', 'content': 0.06456710398197174, 'timestamp': '2025-09-10 02:55:02.215562', 'step': 15596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:02.270910', 'step': 15596, 'epoch': 3} {'type': 'loss', 'content': 0.1240895688533783, 'timestamp': '2025-09-10 02:55:02.273166', 'step': 15597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:02.327669', 'step': 15597, 'epoch': 3} {'type': 'loss', 'content': 0.08149396628141403, 'timestamp': '2025-09-10 02:55:02.330051', 'step': 15598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:02.384296', 'step': 15598, 'epoch': 3} {'type': 'loss', 'content': 0.10901661962270737, 'timestamp': '2025-09-10 02:55:02.387089', 'step': 15599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:02.440716', 'step': 15599, 'epoch': 3} {'type': 'loss', 'content': 0.0861620381474495, 'timestamp': '2025-09-10 02:55:02.446860', 'step': 15600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:02.500702', 'step': 15600, 'epoch': 3} {'type': 'loss', 'content': 0.0700504332780838, 'timestamp': '2025-09-10 02:55:02.503008', 'step': 15601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:02.556463', 'step': 15601, 'epoch': 3} {'type': 'loss', 'content': 0.13619722425937653, 'timestamp': '2025-09-10 02:55:02.559896', 'step': 15602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:02.614477', 'step': 15602, 'epoch': 3} {'type': 'loss', 'content': 0.11989420652389526, 'timestamp': '2025-09-10 02:55:02.616768', 'step': 15603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:02.671556', 'step': 15603, 'epoch': 3} {'type': 'loss', 'content': 0.053812555968761444, 'timestamp': '2025-09-10 02:55:02.678028', 'step': 15604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:02.732259', 'step': 15604, 'epoch': 3} {'type': 'loss', 'content': 0.14053095877170563, 'timestamp': '2025-09-10 02:55:02.734554', 'step': 15605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:02.788991', 'step': 15605, 'epoch': 3} {'type': 'loss', 'content': 0.14096570014953613, 'timestamp': '2025-09-10 02:55:02.791299', 'step': 15606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:02.845203', 'step': 15606, 'epoch': 3} {'type': 'loss', 'content': 0.030872328206896782, 'timestamp': '2025-09-10 02:55:02.847572', 'step': 15607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:02.903362', 'step': 15607, 'epoch': 3} {'type': 'loss', 'content': 0.07648276537656784, 'timestamp': '2025-09-10 02:55:02.909816', 'step': 15608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:02.963367', 'step': 15608, 'epoch': 3} {'type': 'loss', 'content': 0.10158831626176834, 'timestamp': '2025-09-10 02:55:02.965975', 'step': 15609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:03.019711', 'step': 15609, 'epoch': 3} {'type': 'loss', 'content': 0.16047751903533936, 'timestamp': '2025-09-10 02:55:03.022047', 'step': 15610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:03.075876', 'step': 15610, 'epoch': 3} {'type': 'loss', 'content': 0.0781259536743164, 'timestamp': '2025-09-10 02:55:03.078132', 'step': 15611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:03.132231', 'step': 15611, 'epoch': 3} {'type': 'loss', 'content': 0.13064217567443848, 'timestamp': '2025-09-10 02:55:03.138366', 'step': 15612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:03.191190', 'step': 15612, 'epoch': 3} {'type': 'loss', 'content': 0.10869844257831573, 'timestamp': '2025-09-10 02:55:03.193545', 'step': 15613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:03.247700', 'step': 15613, 'epoch': 3} {'type': 'loss', 'content': 0.22629958391189575, 'timestamp': '2025-09-10 02:55:03.249913', 'step': 15614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:03.306877', 'step': 15614, 'epoch': 3} {'type': 'loss', 'content': 0.16818812489509583, 'timestamp': '2025-09-10 02:55:03.309186', 'step': 15615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:03.370494', 'step': 15615, 'epoch': 3} {'type': 'loss', 'content': 0.09734690189361572, 'timestamp': '2025-09-10 02:55:03.376594', 'step': 15616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:03.429998', 'step': 15616, 'epoch': 3} {'type': 'loss', 'content': 0.11864534020423889, 'timestamp': '2025-09-10 02:55:03.432623', 'step': 15617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:03.485969', 'step': 15617, 'epoch': 3} {'type': 'loss', 'content': 0.10484576970338821, 'timestamp': '2025-09-10 02:55:03.488414', 'step': 15618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:03.544807', 'step': 15618, 'epoch': 3} {'type': 'loss', 'content': 0.1290685385465622, 'timestamp': '2025-09-10 02:55:03.547074', 'step': 15619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:03.604726', 'step': 15619, 'epoch': 3} {'type': 'loss', 'content': 0.05467113479971886, 'timestamp': '2025-09-10 02:55:03.610841', 'step': 15620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:03.668418', 'step': 15620, 'epoch': 3} {'type': 'loss', 'content': 0.06122788414359093, 'timestamp': '2025-09-10 02:55:03.670698', 'step': 15621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:03.724943', 'step': 15621, 'epoch': 3} {'type': 'loss', 'content': 0.016847902908921242, 'timestamp': '2025-09-10 02:55:03.727209', 'step': 15622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:03.783705', 'step': 15622, 'epoch': 3} {'type': 'loss', 'content': 0.1100212037563324, 'timestamp': '2025-09-10 02:55:03.785987', 'step': 15623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:03.840618', 'step': 15623, 'epoch': 3} {'type': 'loss', 'content': 0.20844610035419464, 'timestamp': '2025-09-10 02:55:03.846794', 'step': 15624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:03.900310', 'step': 15624, 'epoch': 3} {'type': 'loss', 'content': 0.10273587703704834, 'timestamp': '2025-09-10 02:55:03.902697', 'step': 15625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:03.958031', 'step': 15625, 'epoch': 3} {'type': 'loss', 'content': 0.10814940184354782, 'timestamp': '2025-09-10 02:55:03.960303', 'step': 15626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:04.015306', 'step': 15626, 'epoch': 3} {'type': 'loss', 'content': 0.21938824653625488, 'timestamp': '2025-09-10 02:55:04.017607', 'step': 15627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:04.072827', 'step': 15627, 'epoch': 3} {'type': 'loss', 'content': 0.09408723562955856, 'timestamp': '2025-09-10 02:55:04.079408', 'step': 15628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:04.133262', 'step': 15628, 'epoch': 3} {'type': 'loss', 'content': 0.07622092217206955, 'timestamp': '2025-09-10 02:55:04.135605', 'step': 15629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:04.189704', 'step': 15629, 'epoch': 3} {'type': 'loss', 'content': 0.08736354857683182, 'timestamp': '2025-09-10 02:55:04.192004', 'step': 15630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:04.245817', 'step': 15630, 'epoch': 3} {'type': 'loss', 'content': 0.1138027235865593, 'timestamp': '2025-09-10 02:55:04.248143', 'step': 15631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:04.303426', 'step': 15631, 'epoch': 3} {'type': 'loss', 'content': 0.11007271707057953, 'timestamp': '2025-09-10 02:55:04.309812', 'step': 15632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:04.364116', 'step': 15632, 'epoch': 3} {'type': 'loss', 'content': 0.07380852848291397, 'timestamp': '2025-09-10 02:55:04.366134', 'step': 15633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:04.422124', 'step': 15633, 'epoch': 3} {'type': 'loss', 'content': 0.1242678239941597, 'timestamp': '2025-09-10 02:55:04.424615', 'step': 15634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:04.478771', 'step': 15634, 'epoch': 3} {'type': 'loss', 'content': 0.06274229288101196, 'timestamp': '2025-09-10 02:55:04.481034', 'step': 15635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:04.535764', 'step': 15635, 'epoch': 3} {'type': 'loss', 'content': 0.07036322355270386, 'timestamp': '2025-09-10 02:55:04.541995', 'step': 15636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:04.595210', 'step': 15636, 'epoch': 3} {'type': 'loss', 'content': 0.03995645418763161, 'timestamp': '2025-09-10 02:55:04.597471', 'step': 15637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:04.651481', 'step': 15637, 'epoch': 3} {'type': 'loss', 'content': 0.12489942461252213, 'timestamp': '2025-09-10 02:55:04.653758', 'step': 15638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:04.707828', 'step': 15638, 'epoch': 3} {'type': 'loss', 'content': 0.14050035178661346, 'timestamp': '2025-09-10 02:55:04.710117', 'step': 15639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:04.765704', 'step': 15639, 'epoch': 3} {'type': 'loss', 'content': 0.03855020925402641, 'timestamp': '2025-09-10 02:55:04.771802', 'step': 15640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:04.825116', 'step': 15640, 'epoch': 3} {'type': 'loss', 'content': 0.05887436121702194, 'timestamp': '2025-09-10 02:55:04.827453', 'step': 15641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:04.881529', 'step': 15641, 'epoch': 3} {'type': 'loss', 'content': 0.14228326082229614, 'timestamp': '2025-09-10 02:55:04.883743', 'step': 15642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:04.937710', 'step': 15642, 'epoch': 3} {'type': 'loss', 'content': 0.09313727170228958, 'timestamp': '2025-09-10 02:55:04.940023', 'step': 15643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:04.993301', 'step': 15643, 'epoch': 3} {'type': 'loss', 'content': 0.04007177799940109, 'timestamp': '2025-09-10 02:55:04.999258', 'step': 15644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:05.053272', 'step': 15644, 'epoch': 3} {'type': 'loss', 'content': 0.11234241724014282, 'timestamp': '2025-09-10 02:55:05.055648', 'step': 15645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:05.109130', 'step': 15645, 'epoch': 3} {'type': 'loss', 'content': 0.09551647305488586, 'timestamp': '2025-09-10 02:55:05.111505', 'step': 15646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:05.166080', 'step': 15646, 'epoch': 3} {'type': 'loss', 'content': 0.0900348648428917, 'timestamp': '2025-09-10 02:55:05.168420', 'step': 15647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:05.223722', 'step': 15647, 'epoch': 3} {'type': 'loss', 'content': 0.058970410376787186, 'timestamp': '2025-09-10 02:55:05.229822', 'step': 15648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:05.283160', 'step': 15648, 'epoch': 3} {'type': 'loss', 'content': 0.1144537627696991, 'timestamp': '2025-09-10 02:55:05.285378', 'step': 15649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:05.339076', 'step': 15649, 'epoch': 3} {'type': 'loss', 'content': 0.13540613651275635, 'timestamp': '2025-09-10 02:55:05.341283', 'step': 15650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:05.396614', 'step': 15650, 'epoch': 3} {'type': 'loss', 'content': 0.10489340871572495, 'timestamp': '2025-09-10 02:55:05.398800', 'step': 15651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:05.452508', 'step': 15651, 'epoch': 3} {'type': 'loss', 'content': 0.07273461669683456, 'timestamp': '2025-09-10 02:55:05.458588', 'step': 15652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:05.511904', 'step': 15652, 'epoch': 3} {'type': 'loss', 'content': 0.10922244191169739, 'timestamp': '2025-09-10 02:55:05.514278', 'step': 15653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:05.568362', 'step': 15653, 'epoch': 3} {'type': 'loss', 'content': 0.12058017402887344, 'timestamp': '2025-09-10 02:55:05.570546', 'step': 15654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:05.624486', 'step': 15654, 'epoch': 3} {'type': 'loss', 'content': 0.16361157596111298, 'timestamp': '2025-09-10 02:55:05.626839', 'step': 15655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:05.681268', 'step': 15655, 'epoch': 3} {'type': 'loss', 'content': 0.04274404048919678, 'timestamp': '2025-09-10 02:55:05.687274', 'step': 15656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:05.740676', 'step': 15656, 'epoch': 3} {'type': 'loss', 'content': 0.09297984838485718, 'timestamp': '2025-09-10 02:55:05.742888', 'step': 15657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:05.795872', 'step': 15657, 'epoch': 3} {'type': 'loss', 'content': 0.09318725019693375, 'timestamp': '2025-09-10 02:55:05.798177', 'step': 15658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:05.851892', 'step': 15658, 'epoch': 3} {'type': 'loss', 'content': 0.0928671807050705, 'timestamp': '2025-09-10 02:55:05.854217', 'step': 15659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:05.908349', 'step': 15659, 'epoch': 3} {'type': 'loss', 'content': 0.026888100430369377, 'timestamp': '2025-09-10 02:55:05.914281', 'step': 15660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:05.968158', 'step': 15660, 'epoch': 3} {'type': 'loss', 'content': 0.08822888880968094, 'timestamp': '2025-09-10 02:55:05.970666', 'step': 15661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:06.024691', 'step': 15661, 'epoch': 3} {'type': 'loss', 'content': 0.03478551656007767, 'timestamp': '2025-09-10 02:55:06.027000', 'step': 15662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:06.081171', 'step': 15662, 'epoch': 3} {'type': 'loss', 'content': 0.20820267498493195, 'timestamp': '2025-09-10 02:55:06.083470', 'step': 15663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:06.137204', 'step': 15663, 'epoch': 3} {'type': 'loss', 'content': 0.1569664478302002, 'timestamp': '2025-09-10 02:55:06.143254', 'step': 15664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:06.196945', 'step': 15664, 'epoch': 3} {'type': 'loss', 'content': 0.0484231561422348, 'timestamp': '2025-09-10 02:55:06.199143', 'step': 15665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:06.252926', 'step': 15665, 'epoch': 3} {'type': 'loss', 'content': 0.09382887929677963, 'timestamp': '2025-09-10 02:55:06.255298', 'step': 15666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:06.309897', 'step': 15666, 'epoch': 3} {'type': 'loss', 'content': 0.13496863842010498, 'timestamp': '2025-09-10 02:55:06.312211', 'step': 15667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:06.365685', 'step': 15667, 'epoch': 3} {'type': 'loss', 'content': 0.11422602087259293, 'timestamp': '2025-09-10 02:55:06.371673', 'step': 15668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:06.425153', 'step': 15668, 'epoch': 3} {'type': 'loss', 'content': 0.11143362522125244, 'timestamp': '2025-09-10 02:55:06.427576', 'step': 15669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:06.481511', 'step': 15669, 'epoch': 3} {'type': 'loss', 'content': 0.12269671261310577, 'timestamp': '2025-09-10 02:55:06.483973', 'step': 15670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:06.538485', 'step': 15670, 'epoch': 3} {'type': 'loss', 'content': 0.142045795917511, 'timestamp': '2025-09-10 02:55:06.540840', 'step': 15671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:06.594968', 'step': 15671, 'epoch': 3} {'type': 'loss', 'content': 0.15925802290439606, 'timestamp': '2025-09-10 02:55:06.601018', 'step': 15672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:06.654820', 'step': 15672, 'epoch': 3} {'type': 'loss', 'content': 0.12936268746852875, 'timestamp': '2025-09-10 02:55:06.656962', 'step': 15673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:06.711399', 'step': 15673, 'epoch': 3} {'type': 'loss', 'content': 0.14701169729232788, 'timestamp': '2025-09-10 02:55:06.713871', 'step': 15674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:06.768199', 'step': 15674, 'epoch': 3} {'type': 'loss', 'content': 0.07615497708320618, 'timestamp': '2025-09-10 02:55:06.770776', 'step': 15675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:06.824507', 'step': 15675, 'epoch': 3} {'type': 'loss', 'content': 0.11120869964361191, 'timestamp': '2025-09-10 02:55:06.830627', 'step': 15676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:06.884469', 'step': 15676, 'epoch': 3} {'type': 'loss', 'content': 0.1476900279521942, 'timestamp': '2025-09-10 02:55:06.886852', 'step': 15677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:06.941456', 'step': 15677, 'epoch': 3} {'type': 'loss', 'content': 0.24502447247505188, 'timestamp': '2025-09-10 02:55:06.943814', 'step': 15678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:06.998243', 'step': 15678, 'epoch': 3} {'type': 'loss', 'content': 0.048635657876729965, 'timestamp': '2025-09-10 02:55:07.000716', 'step': 15679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:07.055571', 'step': 15679, 'epoch': 3} {'type': 'loss', 'content': 0.03587872162461281, 'timestamp': '2025-09-10 02:55:07.061598', 'step': 15680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:07.114664', 'step': 15680, 'epoch': 3} {'type': 'loss', 'content': 0.05805595591664314, 'timestamp': '2025-09-10 02:55:07.116849', 'step': 15681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:07.171802', 'step': 15681, 'epoch': 3} {'type': 'loss', 'content': 0.08050908148288727, 'timestamp': '2025-09-10 02:55:07.174054', 'step': 15682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:07.228864', 'step': 15682, 'epoch': 3} {'type': 'loss', 'content': 0.13884294033050537, 'timestamp': '2025-09-10 02:55:07.231208', 'step': 15683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:07.285153', 'step': 15683, 'epoch': 3} {'type': 'loss', 'content': 0.057112760841846466, 'timestamp': '2025-09-10 02:55:07.291222', 'step': 15684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:07.344413', 'step': 15684, 'epoch': 3} {'type': 'loss', 'content': 0.21458368003368378, 'timestamp': '2025-09-10 02:55:07.346735', 'step': 15685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:07.400370', 'step': 15685, 'epoch': 3} {'type': 'loss', 'content': 0.1553013026714325, 'timestamp': '2025-09-10 02:55:07.402846', 'step': 15686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:07.457937', 'step': 15686, 'epoch': 3} {'type': 'loss', 'content': 0.037553705275058746, 'timestamp': '2025-09-10 02:55:07.460300', 'step': 15687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:07.514800', 'step': 15687, 'epoch': 3} {'type': 'loss', 'content': 0.12628906965255737, 'timestamp': '2025-09-10 02:55:07.521258', 'step': 15688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:07.574527', 'step': 15688, 'epoch': 3} {'type': 'loss', 'content': 0.12218012660741806, 'timestamp': '2025-09-10 02:55:07.576893', 'step': 15689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:07.631245', 'step': 15689, 'epoch': 3} {'type': 'loss', 'content': 0.0857459157705307, 'timestamp': '2025-09-10 02:55:07.633647', 'step': 15690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:07.687401', 'step': 15690, 'epoch': 3} {'type': 'loss', 'content': 0.09681088477373123, 'timestamp': '2025-09-10 02:55:07.689769', 'step': 15691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:07.746303', 'step': 15691, 'epoch': 3} {'type': 'loss', 'content': 0.03144903853535652, 'timestamp': '2025-09-10 02:55:07.752546', 'step': 15692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:07.813100', 'step': 15692, 'epoch': 3} {'type': 'loss', 'content': 0.09642430394887924, 'timestamp': '2025-09-10 02:55:07.815305', 'step': 15693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:07.871962', 'step': 15693, 'epoch': 3} {'type': 'loss', 'content': 0.08987696468830109, 'timestamp': '2025-09-10 02:55:07.874130', 'step': 15694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:07.928510', 'step': 15694, 'epoch': 3} {'type': 'loss', 'content': 0.05531459301710129, 'timestamp': '2025-09-10 02:55:07.936734', 'step': 15695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:07.999906', 'step': 15695, 'epoch': 3} {'type': 'loss', 'content': 0.07347679138183594, 'timestamp': '2025-09-10 02:55:08.011429', 'step': 15696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:08.070186', 'step': 15696, 'epoch': 3} {'type': 'loss', 'content': 0.17935991287231445, 'timestamp': '2025-09-10 02:55:08.080608', 'step': 15697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:08.135649', 'step': 15697, 'epoch': 3} {'type': 'loss', 'content': 0.0429624505341053, 'timestamp': '2025-09-10 02:55:08.138010', 'step': 15698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:08.193635', 'step': 15698, 'epoch': 3} {'type': 'loss', 'content': 0.12294874340295792, 'timestamp': '2025-09-10 02:55:08.195867', 'step': 15699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:08.251321', 'step': 15699, 'epoch': 3} {'type': 'loss', 'content': 0.12313897162675858, 'timestamp': '2025-09-10 02:55:08.257834', 'step': 15700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:08.312315', 'step': 15700, 'epoch': 3} {'type': 'loss', 'content': 0.05879534035921097, 'timestamp': '2025-09-10 02:55:08.314489', 'step': 15701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:08.369129', 'step': 15701, 'epoch': 3} {'type': 'loss', 'content': 0.11004601418972015, 'timestamp': '2025-09-10 02:55:08.371471', 'step': 15702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:08.432391', 'step': 15702, 'epoch': 3} {'type': 'loss', 'content': 0.07970289885997772, 'timestamp': '2025-09-10 02:55:08.434578', 'step': 15703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:08.489565', 'step': 15703, 'epoch': 3} {'type': 'loss', 'content': 0.0853772833943367, 'timestamp': '2025-09-10 02:55:08.495712', 'step': 15704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:08.548774', 'step': 15704, 'epoch': 3} {'type': 'loss', 'content': 0.07928143441677094, 'timestamp': '2025-09-10 02:55:08.551112', 'step': 15705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:08.604717', 'step': 15705, 'epoch': 3} {'type': 'loss', 'content': 0.17019625008106232, 'timestamp': '2025-09-10 02:55:08.606987', 'step': 15706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:08.660198', 'step': 15706, 'epoch': 3} {'type': 'loss', 'content': 0.1092221811413765, 'timestamp': '2025-09-10 02:55:08.662467', 'step': 15707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:08.715589', 'step': 15707, 'epoch': 3} {'type': 'loss', 'content': 0.06015641987323761, 'timestamp': '2025-09-10 02:55:08.721712', 'step': 15708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:08.775467', 'step': 15708, 'epoch': 3} {'type': 'loss', 'content': 0.05724141746759415, 'timestamp': '2025-09-10 02:55:08.777703', 'step': 15709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:08.832190', 'step': 15709, 'epoch': 3} {'type': 'loss', 'content': 0.14702653884887695, 'timestamp': '2025-09-10 02:55:08.834556', 'step': 15710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:08.888518', 'step': 15710, 'epoch': 3} {'type': 'loss', 'content': 0.13008642196655273, 'timestamp': '2025-09-10 02:55:08.890749', 'step': 15711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:08.944336', 'step': 15711, 'epoch': 3} {'type': 'loss', 'content': 0.09795529395341873, 'timestamp': '2025-09-10 02:55:08.950558', 'step': 15712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:09.003359', 'step': 15712, 'epoch': 3} {'type': 'loss', 'content': 0.10380574315786362, 'timestamp': '2025-09-10 02:55:09.005609', 'step': 15713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:09.059201', 'step': 15713, 'epoch': 3} {'type': 'loss', 'content': 0.1156640499830246, 'timestamp': '2025-09-10 02:55:09.061637', 'step': 15714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:09.116211', 'step': 15714, 'epoch': 3} {'type': 'loss', 'content': 0.08111809194087982, 'timestamp': '2025-09-10 02:55:09.118436', 'step': 15715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:09.172106', 'step': 15715, 'epoch': 3} {'type': 'loss', 'content': 0.09158789366483688, 'timestamp': '2025-09-10 02:55:09.178235', 'step': 15716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:09.230868', 'step': 15716, 'epoch': 3} {'type': 'loss', 'content': 0.048516396433115005, 'timestamp': '2025-09-10 02:55:09.233216', 'step': 15717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:09.286351', 'step': 15717, 'epoch': 3} {'type': 'loss', 'content': 0.06060310825705528, 'timestamp': '2025-09-10 02:55:09.288691', 'step': 15718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:09.342732', 'step': 15718, 'epoch': 3} {'type': 'loss', 'content': 0.12080390751361847, 'timestamp': '2025-09-10 02:55:09.345087', 'step': 15719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:09.399186', 'step': 15719, 'epoch': 3} {'type': 'loss', 'content': 0.0721282884478569, 'timestamp': '2025-09-10 02:55:09.405520', 'step': 15720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:09.460585', 'step': 15720, 'epoch': 3} {'type': 'loss', 'content': 0.10373825579881668, 'timestamp': '2025-09-10 02:55:09.462898', 'step': 15721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:09.518888', 'step': 15721, 'epoch': 3} {'type': 'loss', 'content': 0.10819337517023087, 'timestamp': '2025-09-10 02:55:09.521189', 'step': 15722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:09.578554', 'step': 15722, 'epoch': 3} {'type': 'loss', 'content': 0.18046467006206512, 'timestamp': '2025-09-10 02:55:09.580921', 'step': 15723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:09.637689', 'step': 15723, 'epoch': 3} {'type': 'loss', 'content': 0.048448752611875534, 'timestamp': '2025-09-10 02:55:09.644517', 'step': 15724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:09.700090', 'step': 15724, 'epoch': 3} {'type': 'loss', 'content': 0.02609427645802498, 'timestamp': '2025-09-10 02:55:09.702438', 'step': 15725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:09.757939', 'step': 15725, 'epoch': 3} {'type': 'loss', 'content': 0.11436665058135986, 'timestamp': '2025-09-10 02:55:09.760102', 'step': 15726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:09.817702', 'step': 15726, 'epoch': 3} {'type': 'loss', 'content': 0.09536348283290863, 'timestamp': '2025-09-10 02:55:09.819889', 'step': 15727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:09.876254', 'step': 15727, 'epoch': 3} {'type': 'loss', 'content': 0.13189411163330078, 'timestamp': '2025-09-10 02:55:09.883106', 'step': 15728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:09.939593', 'step': 15728, 'epoch': 3} {'type': 'loss', 'content': 0.153738871216774, 'timestamp': '2025-09-10 02:55:09.941897', 'step': 15729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:09.999055', 'step': 15729, 'epoch': 3} {'type': 'loss', 'content': 0.10805460065603256, 'timestamp': '2025-09-10 02:55:10.001148', 'step': 15730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:10.055562', 'step': 15730, 'epoch': 3} {'type': 'loss', 'content': 0.06744595617055893, 'timestamp': '2025-09-10 02:55:10.059122', 'step': 15731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:10.114580', 'step': 15731, 'epoch': 3} {'type': 'loss', 'content': 0.16672742366790771, 'timestamp': '2025-09-10 02:55:10.121136', 'step': 15732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:10.174919', 'step': 15732, 'epoch': 3} {'type': 'loss', 'content': 0.04925704747438431, 'timestamp': '2025-09-10 02:55:10.177206', 'step': 15733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:10.230969', 'step': 15733, 'epoch': 3} {'type': 'loss', 'content': 0.08405409008264542, 'timestamp': '2025-09-10 02:55:10.233316', 'step': 15734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:10.289518', 'step': 15734, 'epoch': 3} {'type': 'loss', 'content': 0.06624408066272736, 'timestamp': '2025-09-10 02:55:10.291919', 'step': 15735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:10.352550', 'step': 15735, 'epoch': 3} {'type': 'loss', 'content': 0.07974562048912048, 'timestamp': '2025-09-10 02:55:10.361414', 'step': 15736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:10.422396', 'step': 15736, 'epoch': 3} {'type': 'loss', 'content': 0.14998310804367065, 'timestamp': '2025-09-10 02:55:10.424835', 'step': 15737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:10.485664', 'step': 15737, 'epoch': 3} {'type': 'loss', 'content': 0.09478039294481277, 'timestamp': '2025-09-10 02:55:10.488114', 'step': 15738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:10.548708', 'step': 15738, 'epoch': 3} {'type': 'loss', 'content': 0.12902145087718964, 'timestamp': '2025-09-10 02:55:10.551188', 'step': 15739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:10.611584', 'step': 15739, 'epoch': 3} {'type': 'loss', 'content': 0.14316204190254211, 'timestamp': '2025-09-10 02:55:10.618424', 'step': 15740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:10.673669', 'step': 15740, 'epoch': 3} {'type': 'loss', 'content': 0.13123062252998352, 'timestamp': '2025-09-10 02:55:10.676168', 'step': 15741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:10.730560', 'step': 15741, 'epoch': 3} {'type': 'loss', 'content': 0.07306763529777527, 'timestamp': '2025-09-10 02:55:10.732740', 'step': 15742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:10.786760', 'step': 15742, 'epoch': 3} {'type': 'loss', 'content': 0.06218931823968887, 'timestamp': '2025-09-10 02:55:10.789015', 'step': 15743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:10.842769', 'step': 15743, 'epoch': 3} {'type': 'loss', 'content': 0.0597800575196743, 'timestamp': '2025-09-10 02:55:10.848736', 'step': 15744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:10.902680', 'step': 15744, 'epoch': 3} {'type': 'loss', 'content': 0.1599918156862259, 'timestamp': '2025-09-10 02:55:10.905012', 'step': 15745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:10.959153', 'step': 15745, 'epoch': 3} {'type': 'loss', 'content': 0.08433627337217331, 'timestamp': '2025-09-10 02:55:10.961552', 'step': 15746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:11.015587', 'step': 15746, 'epoch': 3} {'type': 'loss', 'content': 0.21977604925632477, 'timestamp': '2025-09-10 02:55:11.018055', 'step': 15747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:11.071991', 'step': 15747, 'epoch': 3} {'type': 'loss', 'content': 0.1096901074051857, 'timestamp': '2025-09-10 02:55:11.078016', 'step': 15748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:11.131319', 'step': 15748, 'epoch': 3} {'type': 'loss', 'content': 0.08349553495645523, 'timestamp': '2025-09-10 02:55:11.133674', 'step': 15749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:11.187559', 'step': 15749, 'epoch': 3} {'type': 'loss', 'content': 0.07338865846395493, 'timestamp': '2025-09-10 02:55:11.189845', 'step': 15750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:11.243938', 'step': 15750, 'epoch': 3} {'type': 'loss', 'content': 0.12682722508907318, 'timestamp': '2025-09-10 02:55:11.246150', 'step': 15751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:11.299209', 'step': 15751, 'epoch': 3} {'type': 'loss', 'content': 0.10228271782398224, 'timestamp': '2025-09-10 02:55:11.305065', 'step': 15752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:11.358175', 'step': 15752, 'epoch': 3} {'type': 'loss', 'content': 0.055691562592983246, 'timestamp': '2025-09-10 02:55:11.360459', 'step': 15753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:11.414246', 'step': 15753, 'epoch': 3} {'type': 'loss', 'content': 0.06690891087055206, 'timestamp': '2025-09-10 02:55:11.416501', 'step': 15754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:11.473740', 'step': 15754, 'epoch': 3} {'type': 'loss', 'content': 0.057793475687503815, 'timestamp': '2025-09-10 02:55:11.476140', 'step': 15755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:11.533726', 'step': 15755, 'epoch': 3} {'type': 'loss', 'content': 0.037027306854724884, 'timestamp': '2025-09-10 02:55:11.540297', 'step': 15756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:11.595891', 'step': 15756, 'epoch': 3} {'type': 'loss', 'content': 0.1691914051771164, 'timestamp': '2025-09-10 02:55:11.598122', 'step': 15757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:11.652934', 'step': 15757, 'epoch': 3} {'type': 'loss', 'content': 0.09813030064105988, 'timestamp': '2025-09-10 02:55:11.655340', 'step': 15758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:11.709631', 'step': 15758, 'epoch': 3} {'type': 'loss', 'content': 0.20136840641498566, 'timestamp': '2025-09-10 02:55:11.711852', 'step': 15759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:11.766474', 'step': 15759, 'epoch': 3} {'type': 'loss', 'content': 0.10979007929563522, 'timestamp': '2025-09-10 02:55:11.772934', 'step': 15760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:11.827990', 'step': 15760, 'epoch': 3} {'type': 'loss', 'content': 0.13634154200553894, 'timestamp': '2025-09-10 02:55:11.830460', 'step': 15761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:11.885689', 'step': 15761, 'epoch': 3} {'type': 'loss', 'content': 0.03733089193701744, 'timestamp': '2025-09-10 02:55:11.888024', 'step': 15762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:11.943048', 'step': 15762, 'epoch': 3} {'type': 'loss', 'content': 0.15025576949119568, 'timestamp': '2025-09-10 02:55:11.945391', 'step': 15763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:12.000416', 'step': 15763, 'epoch': 3} {'type': 'loss', 'content': 0.11401010304689407, 'timestamp': '2025-09-10 02:55:12.006787', 'step': 15764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:12.060336', 'step': 15764, 'epoch': 3} {'type': 'loss', 'content': 0.12369591742753983, 'timestamp': '2025-09-10 02:55:12.062442', 'step': 15765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:12.116840', 'step': 15765, 'epoch': 3} {'type': 'loss', 'content': 0.11632320284843445, 'timestamp': '2025-09-10 02:55:12.119104', 'step': 15766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:12.173146', 'step': 15766, 'epoch': 3} {'type': 'loss', 'content': 0.07883453369140625, 'timestamp': '2025-09-10 02:55:12.175537', 'step': 15767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:12.230079', 'step': 15767, 'epoch': 3} {'type': 'loss', 'content': 0.09951549023389816, 'timestamp': '2025-09-10 02:55:12.236186', 'step': 15768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:12.289799', 'step': 15768, 'epoch': 3} {'type': 'loss', 'content': 0.09586642682552338, 'timestamp': '2025-09-10 02:55:12.291958', 'step': 15769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:12.345554', 'step': 15769, 'epoch': 3} {'type': 'loss', 'content': 0.0811026394367218, 'timestamp': '2025-09-10 02:55:12.347657', 'step': 15770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:12.401904', 'step': 15770, 'epoch': 3} {'type': 'loss', 'content': 0.11480777710676193, 'timestamp': '2025-09-10 02:55:12.404280', 'step': 15771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:12.458704', 'step': 15771, 'epoch': 3} {'type': 'loss', 'content': 0.07413538545370102, 'timestamp': '2025-09-10 02:55:12.464908', 'step': 15772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:12.519252', 'step': 15772, 'epoch': 3} {'type': 'loss', 'content': 0.00737781310454011, 'timestamp': '2025-09-10 02:55:12.521410', 'step': 15773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:12.576966', 'step': 15773, 'epoch': 3} {'type': 'loss', 'content': 0.11017531156539917, 'timestamp': '2025-09-10 02:55:12.579153', 'step': 15774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:12.633898', 'step': 15774, 'epoch': 3} {'type': 'loss', 'content': 0.07718916982412338, 'timestamp': '2025-09-10 02:55:12.636296', 'step': 15775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:12.690021', 'step': 15775, 'epoch': 3} {'type': 'loss', 'content': 0.1013021394610405, 'timestamp': '2025-09-10 02:55:12.696539', 'step': 15776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:12.749654', 'step': 15776, 'epoch': 3} {'type': 'loss', 'content': 0.08000659197568893, 'timestamp': '2025-09-10 02:55:12.751735', 'step': 15777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:12.806058', 'step': 15777, 'epoch': 3} {'type': 'loss', 'content': 0.14145603775978088, 'timestamp': '2025-09-10 02:55:12.808315', 'step': 15778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:12.862710', 'step': 15778, 'epoch': 3} {'type': 'loss', 'content': 0.032660696655511856, 'timestamp': '2025-09-10 02:55:12.864928', 'step': 15779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:12.918845', 'step': 15779, 'epoch': 3} {'type': 'loss', 'content': 0.11210542917251587, 'timestamp': '2025-09-10 02:55:12.925048', 'step': 15780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:12.977629', 'step': 15780, 'epoch': 3} {'type': 'loss', 'content': 0.04851949214935303, 'timestamp': '2025-09-10 02:55:12.979677', 'step': 15781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:13.032599', 'step': 15781, 'epoch': 3} {'type': 'loss', 'content': 0.0702952966094017, 'timestamp': '2025-09-10 02:55:13.034753', 'step': 15782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:13.088101', 'step': 15782, 'epoch': 3} {'type': 'loss', 'content': 0.21908040344715118, 'timestamp': '2025-09-10 02:55:13.090380', 'step': 15783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:13.143438', 'step': 15783, 'epoch': 3} {'type': 'loss', 'content': 0.09988827258348465, 'timestamp': '2025-09-10 02:55:13.149496', 'step': 15784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:13.202595', 'step': 15784, 'epoch': 3} {'type': 'loss', 'content': 0.1490054428577423, 'timestamp': '2025-09-10 02:55:13.206293', 'step': 15785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:13.259865', 'step': 15785, 'epoch': 3} {'type': 'loss', 'content': 0.14146839082241058, 'timestamp': '2025-09-10 02:55:13.262206', 'step': 15786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:13.315671', 'step': 15786, 'epoch': 3} {'type': 'loss', 'content': 0.08518914878368378, 'timestamp': '2025-09-10 02:55:13.317783', 'step': 15787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:13.370707', 'step': 15787, 'epoch': 3} {'type': 'loss', 'content': 0.16314813494682312, 'timestamp': '2025-09-10 02:55:13.376414', 'step': 15788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:13.429275', 'step': 15788, 'epoch': 3} {'type': 'loss', 'content': 0.1052057296037674, 'timestamp': '2025-09-10 02:55:13.431664', 'step': 15789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:13.485099', 'step': 15789, 'epoch': 3} {'type': 'loss', 'content': 0.14590175449848175, 'timestamp': '2025-09-10 02:55:13.487411', 'step': 15790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:13.541098', 'step': 15790, 'epoch': 3} {'type': 'loss', 'content': 0.03742848336696625, 'timestamp': '2025-09-10 02:55:13.543094', 'step': 15791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:13.596300', 'step': 15791, 'epoch': 3} {'type': 'loss', 'content': 0.10437016189098358, 'timestamp': '2025-09-10 02:55:13.602263', 'step': 15792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:13.656269', 'step': 15792, 'epoch': 3} {'type': 'loss', 'content': 0.08089442551136017, 'timestamp': '2025-09-10 02:55:13.658222', 'step': 15793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:13.711090', 'step': 15793, 'epoch': 3} {'type': 'loss', 'content': 0.20229946076869965, 'timestamp': '2025-09-10 02:55:13.713460', 'step': 15794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:13.767620', 'step': 15794, 'epoch': 3} {'type': 'loss', 'content': 0.07498861104249954, 'timestamp': '2025-09-10 02:55:13.769791', 'step': 15795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:13.822888', 'step': 15795, 'epoch': 3} {'type': 'loss', 'content': 0.13432453572750092, 'timestamp': '2025-09-10 02:55:13.828655', 'step': 15796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:13.881166', 'step': 15796, 'epoch': 3} {'type': 'loss', 'content': 0.09757060557603836, 'timestamp': '2025-09-10 02:55:13.883403', 'step': 15797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:13.936605', 'step': 15797, 'epoch': 3} {'type': 'loss', 'content': 0.13595695793628693, 'timestamp': '2025-09-10 02:55:13.938767', 'step': 15798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:13.992315', 'step': 15798, 'epoch': 3} {'type': 'loss', 'content': 0.09088224917650223, 'timestamp': '2025-09-10 02:55:13.994507', 'step': 15799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:14.048156', 'step': 15799, 'epoch': 3} {'type': 'loss', 'content': 0.060755882412195206, 'timestamp': '2025-09-10 02:55:14.053896', 'step': 15800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:14.105961', 'step': 15800, 'epoch': 3} {'type': 'loss', 'content': 0.04921937361359596, 'timestamp': '2025-09-10 02:55:14.107960', 'step': 15801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:14.161628', 'step': 15801, 'epoch': 3} {'type': 'loss', 'content': 0.1253536343574524, 'timestamp': '2025-09-10 02:55:14.163649', 'step': 15802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:14.217187', 'step': 15802, 'epoch': 3} {'type': 'loss', 'content': 0.11126649379730225, 'timestamp': '2025-09-10 02:55:14.219486', 'step': 15803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:14.271872', 'step': 15803, 'epoch': 3} {'type': 'loss', 'content': 0.13259601593017578, 'timestamp': '2025-09-10 02:55:14.277921', 'step': 15804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:14.331105', 'step': 15804, 'epoch': 3} {'type': 'loss', 'content': 0.08259709179401398, 'timestamp': '2025-09-10 02:55:14.333477', 'step': 15805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:14.386567', 'step': 15805, 'epoch': 3} {'type': 'loss', 'content': 0.225145161151886, 'timestamp': '2025-09-10 02:55:14.388602', 'step': 15806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:14.441873', 'step': 15806, 'epoch': 3} {'type': 'loss', 'content': 0.13655076920986176, 'timestamp': '2025-09-10 02:55:14.444042', 'step': 15807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:14.498099', 'step': 15807, 'epoch': 3} {'type': 'loss', 'content': 0.15598075091838837, 'timestamp': '2025-09-10 02:55:14.503748', 'step': 15808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:14.556188', 'step': 15808, 'epoch': 3} {'type': 'loss', 'content': 0.21940220892429352, 'timestamp': '2025-09-10 02:55:14.558192', 'step': 15809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:14.610926', 'step': 15809, 'epoch': 3} {'type': 'loss', 'content': 0.04215702787041664, 'timestamp': '2025-09-10 02:55:14.612948', 'step': 15810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:14.667028', 'step': 15810, 'epoch': 3} {'type': 'loss', 'content': 0.12226194888353348, 'timestamp': '2025-09-10 02:55:14.669251', 'step': 15811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:14.723706', 'step': 15811, 'epoch': 3} {'type': 'loss', 'content': 0.05899064242839813, 'timestamp': '2025-09-10 02:55:14.729826', 'step': 15812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:14.783085', 'step': 15812, 'epoch': 3} {'type': 'loss', 'content': 0.1175764724612236, 'timestamp': '2025-09-10 02:55:14.785317', 'step': 15813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:14.839375', 'step': 15813, 'epoch': 3} {'type': 'loss', 'content': 0.1008145660161972, 'timestamp': '2025-09-10 02:55:14.842297', 'step': 15814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:14.895736', 'step': 15814, 'epoch': 3} {'type': 'loss', 'content': 0.08324667811393738, 'timestamp': '2025-09-10 02:55:14.897968', 'step': 15815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:14.953052', 'step': 15815, 'epoch': 3} {'type': 'loss', 'content': 0.12687240540981293, 'timestamp': '2025-09-10 02:55:14.958894', 'step': 15816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:15.013325', 'step': 15816, 'epoch': 3} {'type': 'loss', 'content': 0.10855971276760101, 'timestamp': '2025-09-10 02:55:15.017072', 'step': 15817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:15.071279', 'step': 15817, 'epoch': 3} {'type': 'loss', 'content': 0.06691916286945343, 'timestamp': '2025-09-10 02:55:15.073632', 'step': 15818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:15.126834', 'step': 15818, 'epoch': 3} {'type': 'loss', 'content': 0.1297219842672348, 'timestamp': '2025-09-10 02:55:15.129149', 'step': 15819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:15.182762', 'step': 15819, 'epoch': 3} {'type': 'loss', 'content': 0.20422662794589996, 'timestamp': '2025-09-10 02:55:15.188588', 'step': 15820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:15.242364', 'step': 15820, 'epoch': 3} {'type': 'loss', 'content': 0.06982026994228363, 'timestamp': '2025-09-10 02:55:15.244646', 'step': 15821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:15.298272', 'step': 15821, 'epoch': 3} {'type': 'loss', 'content': 0.09548287093639374, 'timestamp': '2025-09-10 02:55:15.300541', 'step': 15822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:15.354499', 'step': 15822, 'epoch': 3} {'type': 'loss', 'content': 0.05117641016840935, 'timestamp': '2025-09-10 02:55:15.356692', 'step': 15823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:15.410614', 'step': 15823, 'epoch': 3} {'type': 'loss', 'content': 0.08058483153581619, 'timestamp': '2025-09-10 02:55:15.416464', 'step': 15824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:15.470067', 'step': 15824, 'epoch': 3} {'type': 'loss', 'content': 0.13604286313056946, 'timestamp': '2025-09-10 02:55:15.472268', 'step': 15825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:15.526377', 'step': 15825, 'epoch': 3} {'type': 'loss', 'content': 0.14989732205867767, 'timestamp': '2025-09-10 02:55:15.528630', 'step': 15826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:15.582545', 'step': 15826, 'epoch': 3} {'type': 'loss', 'content': 0.09610486030578613, 'timestamp': '2025-09-10 02:55:15.584769', 'step': 15827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:15.638374', 'step': 15827, 'epoch': 3} {'type': 'loss', 'content': 0.07892414182424545, 'timestamp': '2025-09-10 02:55:15.644132', 'step': 15828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:15.696682', 'step': 15828, 'epoch': 3} {'type': 'loss', 'content': 0.11686097085475922, 'timestamp': '2025-09-10 02:55:15.698682', 'step': 15829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:15.751973', 'step': 15829, 'epoch': 3} {'type': 'loss', 'content': 0.10443732887506485, 'timestamp': '2025-09-10 02:55:15.754074', 'step': 15830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:15.809226', 'step': 15830, 'epoch': 3} {'type': 'loss', 'content': 0.12954293191432953, 'timestamp': '2025-09-10 02:55:15.811464', 'step': 15831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:15.865337', 'step': 15831, 'epoch': 3} {'type': 'loss', 'content': 0.08297255635261536, 'timestamp': '2025-09-10 02:55:15.871423', 'step': 15832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:15.924004', 'step': 15832, 'epoch': 3} {'type': 'loss', 'content': 0.11083243787288666, 'timestamp': '2025-09-10 02:55:15.926562', 'step': 15833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:15.979918', 'step': 15833, 'epoch': 3} {'type': 'loss', 'content': 0.05773388594388962, 'timestamp': '2025-09-10 02:55:15.982152', 'step': 15834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:16.035774', 'step': 15834, 'epoch': 3} {'type': 'loss', 'content': 0.2352546751499176, 'timestamp': '2025-09-10 02:55:16.037978', 'step': 15835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:16.091144', 'step': 15835, 'epoch': 3} {'type': 'loss', 'content': 0.09243172407150269, 'timestamp': '2025-09-10 02:55:16.098462', 'step': 15836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:16.151272', 'step': 15836, 'epoch': 3} {'type': 'loss', 'content': 0.1635996401309967, 'timestamp': '2025-09-10 02:55:16.153586', 'step': 15837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:16.206982', 'step': 15837, 'epoch': 3} {'type': 'loss', 'content': 0.12034861743450165, 'timestamp': '2025-09-10 02:55:16.209400', 'step': 15838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:16.262731', 'step': 15838, 'epoch': 3} {'type': 'loss', 'content': 0.17718181014060974, 'timestamp': '2025-09-10 02:55:16.265011', 'step': 15839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:16.319382', 'step': 15839, 'epoch': 3} {'type': 'loss', 'content': 0.09953922033309937, 'timestamp': '2025-09-10 02:55:16.325484', 'step': 15840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:16.378971', 'step': 15840, 'epoch': 3} {'type': 'loss', 'content': 0.1445791870355606, 'timestamp': '2025-09-10 02:55:16.381284', 'step': 15841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:16.434902', 'step': 15841, 'epoch': 3} {'type': 'loss', 'content': 0.12180715054273605, 'timestamp': '2025-09-10 02:55:16.437102', 'step': 15842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:16.490848', 'step': 15842, 'epoch': 3} {'type': 'loss', 'content': 0.08492908626794815, 'timestamp': '2025-09-10 02:55:16.493188', 'step': 15843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:16.546349', 'step': 15843, 'epoch': 3} {'type': 'loss', 'content': 0.15807297825813293, 'timestamp': '2025-09-10 02:55:16.552183', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:55:29.229236', 'step': 15844, 'epoch': 3} {'type': 'pplx', 'content': 10341.392792043087, 'timestamp': '2025-09-10 02:55:29.232562', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:29.287034', 'step': 15844, 'epoch': 3} {'type': 'loss', 'content': 0.08067815005779266, 'timestamp': '2025-09-10 02:55:29.288922', 'step': 15845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:29.343730', 'step': 15845, 'epoch': 3} {'type': 'loss', 'content': 0.22636882960796356, 'timestamp': '2025-09-10 02:55:29.345882', 'step': 15846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:29.399489', 'step': 15846, 'epoch': 3} {'type': 'loss', 'content': 0.13206149637699127, 'timestamp': '2025-09-10 02:55:29.401639', 'step': 15847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:29.454951', 'step': 15847, 'epoch': 3} {'type': 'loss', 'content': 0.07718463242053986, 'timestamp': '2025-09-10 02:55:29.461067', 'step': 15848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:29.516464', 'step': 15848, 'epoch': 3} {'type': 'loss', 'content': 0.1595745086669922, 'timestamp': '2025-09-10 02:55:29.518476', 'step': 15849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:29.571959', 'step': 15849, 'epoch': 3} {'type': 'loss', 'content': 0.05508076399564743, 'timestamp': '2025-09-10 02:55:29.574191', 'step': 15850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:29.627175', 'step': 15850, 'epoch': 3} {'type': 'loss', 'content': 0.12483510375022888, 'timestamp': '2025-09-10 02:55:29.629421', 'step': 15851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:29.683462', 'step': 15851, 'epoch': 3} {'type': 'loss', 'content': 0.17945928871631622, 'timestamp': '2025-09-10 02:55:29.689729', 'step': 15852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:29.745074', 'step': 15852, 'epoch': 3} {'type': 'loss', 'content': 0.10004528611898422, 'timestamp': '2025-09-10 02:55:29.747337', 'step': 15853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:29.801305', 'step': 15853, 'epoch': 3} {'type': 'loss', 'content': 0.042530421167612076, 'timestamp': '2025-09-10 02:55:29.803435', 'step': 15854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:29.857545', 'step': 15854, 'epoch': 3} {'type': 'loss', 'content': 0.14996786415576935, 'timestamp': '2025-09-10 02:55:29.859843', 'step': 15855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:29.914195', 'step': 15855, 'epoch': 3} {'type': 'loss', 'content': 0.07957305759191513, 'timestamp': '2025-09-10 02:55:29.920316', 'step': 15856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:29.973282', 'step': 15856, 'epoch': 3} {'type': 'loss', 'content': 0.11298943310976028, 'timestamp': '2025-09-10 02:55:29.975432', 'step': 15857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:30.028705', 'step': 15857, 'epoch': 3} {'type': 'loss', 'content': 0.1147618368268013, 'timestamp': '2025-09-10 02:55:30.030881', 'step': 15858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:30.084493', 'step': 15858, 'epoch': 3} {'type': 'loss', 'content': 0.11450234800577164, 'timestamp': '2025-09-10 02:55:30.086679', 'step': 15859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:30.141100', 'step': 15859, 'epoch': 3} {'type': 'loss', 'content': 0.12791195511817932, 'timestamp': '2025-09-10 02:55:30.148134', 'step': 15860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:30.206344', 'step': 15860, 'epoch': 3} {'type': 'loss', 'content': 0.10252217948436737, 'timestamp': '2025-09-10 02:55:30.208606', 'step': 15861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:30.266567', 'step': 15861, 'epoch': 3} {'type': 'loss', 'content': 0.11166132986545563, 'timestamp': '2025-09-10 02:55:30.269562', 'step': 15862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:30.327712', 'step': 15862, 'epoch': 3} {'type': 'loss', 'content': 0.1444670855998993, 'timestamp': '2025-09-10 02:55:30.329862', 'step': 15863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:30.383481', 'step': 15863, 'epoch': 3} {'type': 'loss', 'content': 0.0660405382514, 'timestamp': '2025-09-10 02:55:30.389573', 'step': 15864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:30.444195', 'step': 15864, 'epoch': 3} {'type': 'loss', 'content': 0.06927661597728729, 'timestamp': '2025-09-10 02:55:30.446323', 'step': 15865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:30.498949', 'step': 15865, 'epoch': 3} {'type': 'loss', 'content': 0.05624496564269066, 'timestamp': '2025-09-10 02:55:30.501163', 'step': 15866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:30.555116', 'step': 15866, 'epoch': 3} {'type': 'loss', 'content': 0.12814195454120636, 'timestamp': '2025-09-10 02:55:30.557315', 'step': 15867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:30.610751', 'step': 15867, 'epoch': 3} {'type': 'loss', 'content': 0.15221361815929413, 'timestamp': '2025-09-10 02:55:30.616780', 'step': 15868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:30.671430', 'step': 15868, 'epoch': 3} {'type': 'loss', 'content': 0.11890211701393127, 'timestamp': '2025-09-10 02:55:30.673701', 'step': 15869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:30.727415', 'step': 15869, 'epoch': 3} {'type': 'loss', 'content': 0.09135523438453674, 'timestamp': '2025-09-10 02:55:30.729700', 'step': 15870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:30.785232', 'step': 15870, 'epoch': 3} {'type': 'loss', 'content': 0.11505121737718582, 'timestamp': '2025-09-10 02:55:30.787571', 'step': 15871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:30.840958', 'step': 15871, 'epoch': 3} {'type': 'loss', 'content': 0.09228906035423279, 'timestamp': '2025-09-10 02:55:30.846938', 'step': 15872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:30.900934', 'step': 15872, 'epoch': 3} {'type': 'loss', 'content': 0.1150292158126831, 'timestamp': '2025-09-10 02:55:30.903085', 'step': 15873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:30.956467', 'step': 15873, 'epoch': 3} {'type': 'loss', 'content': 0.03151673451066017, 'timestamp': '2025-09-10 02:55:30.958688', 'step': 15874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.013168', 'step': 15874, 'epoch': 3} {'type': 'loss', 'content': 0.12440165877342224, 'timestamp': '2025-09-10 02:55:31.015546', 'step': 15875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.069319', 'step': 15875, 'epoch': 3} {'type': 'loss', 'content': 0.07973513007164001, 'timestamp': '2025-09-10 02:55:31.075346', 'step': 15876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.128702', 'step': 15876, 'epoch': 3} {'type': 'loss', 'content': 0.10951102524995804, 'timestamp': '2025-09-10 02:55:31.130794', 'step': 15877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:31.184490', 'step': 15877, 'epoch': 3} {'type': 'loss', 'content': 0.09319449961185455, 'timestamp': '2025-09-10 02:55:31.186632', 'step': 15878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.240070', 'step': 15878, 'epoch': 3} {'type': 'loss', 'content': 0.1763869673013687, 'timestamp': '2025-09-10 02:55:31.242108', 'step': 15879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.295687', 'step': 15879, 'epoch': 3} {'type': 'loss', 'content': 0.1013004407286644, 'timestamp': '2025-09-10 02:55:31.302015', 'step': 15880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.355764', 'step': 15880, 'epoch': 3} {'type': 'loss', 'content': 0.09744520485401154, 'timestamp': '2025-09-10 02:55:31.357868', 'step': 15881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:31.412160', 'step': 15881, 'epoch': 3} {'type': 'loss', 'content': 0.08491990715265274, 'timestamp': '2025-09-10 02:55:31.414442', 'step': 15882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.469834', 'step': 15882, 'epoch': 3} {'type': 'loss', 'content': 0.1270986646413803, 'timestamp': '2025-09-10 02:55:31.471751', 'step': 15883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.527687', 'step': 15883, 'epoch': 3} {'type': 'loss', 'content': 0.07593308389186859, 'timestamp': '2025-09-10 02:55:31.533483', 'step': 15884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:31.586346', 'step': 15884, 'epoch': 3} {'type': 'loss', 'content': 0.10895417630672455, 'timestamp': '2025-09-10 02:55:31.588491', 'step': 15885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.641782', 'step': 15885, 'epoch': 3} {'type': 'loss', 'content': 0.043397579342126846, 'timestamp': '2025-09-10 02:55:31.643995', 'step': 15886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:31.698340', 'step': 15886, 'epoch': 3} {'type': 'loss', 'content': 0.07186909019947052, 'timestamp': '2025-09-10 02:55:31.700591', 'step': 15887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:31.755661', 'step': 15887, 'epoch': 3} {'type': 'loss', 'content': 0.10975557565689087, 'timestamp': '2025-09-10 02:55:31.761953', 'step': 15888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.818022', 'step': 15888, 'epoch': 3} {'type': 'loss', 'content': 0.07378045469522476, 'timestamp': '2025-09-10 02:55:31.820367', 'step': 15889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:31.875944', 'step': 15889, 'epoch': 3} {'type': 'loss', 'content': 0.10673460364341736, 'timestamp': '2025-09-10 02:55:31.878001', 'step': 15890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:31.932989', 'step': 15890, 'epoch': 3} {'type': 'loss', 'content': 0.08512597531080246, 'timestamp': '2025-09-10 02:55:31.935064', 'step': 15891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:31.989406', 'step': 15891, 'epoch': 3} {'type': 'loss', 'content': 0.21666856110095978, 'timestamp': '2025-09-10 02:55:31.995483', 'step': 15892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:32.049655', 'step': 15892, 'epoch': 3} {'type': 'loss', 'content': 0.06038305163383484, 'timestamp': '2025-09-10 02:55:32.051713', 'step': 15893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:32.104814', 'step': 15893, 'epoch': 3} {'type': 'loss', 'content': 0.04492476209998131, 'timestamp': '2025-09-10 02:55:32.107031', 'step': 15894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:32.161065', 'step': 15894, 'epoch': 3} {'type': 'loss', 'content': 0.11668188124895096, 'timestamp': '2025-09-10 02:55:32.163099', 'step': 15895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:32.216525', 'step': 15895, 'epoch': 3} {'type': 'loss', 'content': 0.09994838386774063, 'timestamp': '2025-09-10 02:55:32.222532', 'step': 15896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:32.276363', 'step': 15896, 'epoch': 3} {'type': 'loss', 'content': 0.0776476114988327, 'timestamp': '2025-09-10 02:55:32.278348', 'step': 15897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:32.332401', 'step': 15897, 'epoch': 3} {'type': 'loss', 'content': 0.15136857330799103, 'timestamp': '2025-09-10 02:55:32.334735', 'step': 15898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:32.389094', 'step': 15898, 'epoch': 3} {'type': 'loss', 'content': 0.09245619177818298, 'timestamp': '2025-09-10 02:55:32.391204', 'step': 15899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:32.445626', 'step': 15899, 'epoch': 3} {'type': 'loss', 'content': 0.08203128725290298, 'timestamp': '2025-09-10 02:55:32.451545', 'step': 15900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:32.504522', 'step': 15900, 'epoch': 3} {'type': 'loss', 'content': 0.07680152356624603, 'timestamp': '2025-09-10 02:55:32.506535', 'step': 15901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:32.559850', 'step': 15901, 'epoch': 3} {'type': 'loss', 'content': 0.09575401246547699, 'timestamp': '2025-09-10 02:55:32.562006', 'step': 15902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:32.617448', 'step': 15902, 'epoch': 3} {'type': 'loss', 'content': 0.038058530539274216, 'timestamp': '2025-09-10 02:55:32.619399', 'step': 15903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:32.672402', 'step': 15903, 'epoch': 3} {'type': 'loss', 'content': 0.10726747661828995, 'timestamp': '2025-09-10 02:55:32.678098', 'step': 15904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:32.731293', 'step': 15904, 'epoch': 3} {'type': 'loss', 'content': 0.11144380271434784, 'timestamp': '2025-09-10 02:55:32.733402', 'step': 15905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:32.787342', 'step': 15905, 'epoch': 3} {'type': 'loss', 'content': 0.057755351066589355, 'timestamp': '2025-09-10 02:55:32.789511', 'step': 15906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:32.844182', 'step': 15906, 'epoch': 3} {'type': 'loss', 'content': 0.07323915511369705, 'timestamp': '2025-09-10 02:55:32.846348', 'step': 15907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:32.902061', 'step': 15907, 'epoch': 3} {'type': 'loss', 'content': 0.08734928071498871, 'timestamp': '2025-09-10 02:55:32.908588', 'step': 15908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:32.963094', 'step': 15908, 'epoch': 3} {'type': 'loss', 'content': 0.1320994645357132, 'timestamp': '2025-09-10 02:55:32.965346', 'step': 15909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:33.019570', 'step': 15909, 'epoch': 3} {'type': 'loss', 'content': 0.10867287218570709, 'timestamp': '2025-09-10 02:55:33.021628', 'step': 15910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:33.075786', 'step': 15910, 'epoch': 3} {'type': 'loss', 'content': 0.07577566802501678, 'timestamp': '2025-09-10 02:55:33.077813', 'step': 15911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:33.131338', 'step': 15911, 'epoch': 3} {'type': 'loss', 'content': 0.15826229751110077, 'timestamp': '2025-09-10 02:55:33.137598', 'step': 15912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:33.190759', 'step': 15912, 'epoch': 3} {'type': 'loss', 'content': 0.07163297384977341, 'timestamp': '2025-09-10 02:55:33.192960', 'step': 15913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:33.246390', 'step': 15913, 'epoch': 3} {'type': 'loss', 'content': 0.11057402193546295, 'timestamp': '2025-09-10 02:55:33.248823', 'step': 15914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:33.305258', 'step': 15914, 'epoch': 3} {'type': 'loss', 'content': 0.07230789959430695, 'timestamp': '2025-09-10 02:55:33.315197', 'step': 15915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:33.379135', 'step': 15915, 'epoch': 3} {'type': 'loss', 'content': 0.13242658972740173, 'timestamp': '2025-09-10 02:55:33.386868', 'step': 15916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:33.447349', 'step': 15916, 'epoch': 3} {'type': 'loss', 'content': 0.1062854453921318, 'timestamp': '2025-09-10 02:55:33.450648', 'step': 15917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:33.504470', 'step': 15917, 'epoch': 3} {'type': 'loss', 'content': 0.14980345964431763, 'timestamp': '2025-09-10 02:55:33.507751', 'step': 15918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:33.603075', 'step': 15918, 'epoch': 3} {'type': 'loss', 'content': 0.09458983689546585, 'timestamp': '2025-09-10 02:55:33.605302', 'step': 15919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:33.664869', 'step': 15919, 'epoch': 3} {'type': 'loss', 'content': 0.11393173038959503, 'timestamp': '2025-09-10 02:55:33.671094', 'step': 15920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:33.768605', 'step': 15920, 'epoch': 3} {'type': 'loss', 'content': 0.071254201233387, 'timestamp': '2025-09-10 02:55:33.771030', 'step': 15921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:33.826449', 'step': 15921, 'epoch': 3} {'type': 'loss', 'content': 0.11737404763698578, 'timestamp': '2025-09-10 02:55:33.830941', 'step': 15922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:33.894699', 'step': 15922, 'epoch': 3} {'type': 'loss', 'content': 0.18676155805587769, 'timestamp': '2025-09-10 02:55:33.896714', 'step': 15923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:33.979108', 'step': 15923, 'epoch': 3} {'type': 'loss', 'content': 0.06955993920564651, 'timestamp': '2025-09-10 02:55:33.985907', 'step': 15924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:34.086809', 'step': 15924, 'epoch': 3} {'type': 'loss', 'content': 0.06760261952877045, 'timestamp': '2025-09-10 02:55:34.088993', 'step': 15925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:34.181877', 'step': 15925, 'epoch': 3} {'type': 'loss', 'content': 0.09485790878534317, 'timestamp': '2025-09-10 02:55:34.183865', 'step': 15926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:34.270584', 'step': 15926, 'epoch': 3} {'type': 'loss', 'content': 0.10676681995391846, 'timestamp': '2025-09-10 02:55:34.272879', 'step': 15927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:34.343102', 'step': 15927, 'epoch': 3} {'type': 'loss', 'content': 0.09752435982227325, 'timestamp': '2025-09-10 02:55:34.349527', 'step': 15928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:34.403665', 'step': 15928, 'epoch': 3} {'type': 'loss', 'content': 0.11765813827514648, 'timestamp': '2025-09-10 02:55:34.405933', 'step': 15929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:34.460355', 'step': 15929, 'epoch': 3} {'type': 'loss', 'content': 0.0656290352344513, 'timestamp': '2025-09-10 02:55:34.462490', 'step': 15930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:34.518367', 'step': 15930, 'epoch': 3} {'type': 'loss', 'content': 0.20080648362636566, 'timestamp': '2025-09-10 02:55:34.520567', 'step': 15931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:34.575868', 'step': 15931, 'epoch': 3} {'type': 'loss', 'content': 0.0973149836063385, 'timestamp': '2025-09-10 02:55:34.582175', 'step': 15932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:34.635925', 'step': 15932, 'epoch': 3} {'type': 'loss', 'content': 0.12929648160934448, 'timestamp': '2025-09-10 02:55:34.638125', 'step': 15933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:34.693253', 'step': 15933, 'epoch': 3} {'type': 'loss', 'content': 0.0786311998963356, 'timestamp': '2025-09-10 02:55:34.695674', 'step': 15934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:34.749468', 'step': 15934, 'epoch': 3} {'type': 'loss', 'content': 0.04816329851746559, 'timestamp': '2025-09-10 02:55:34.751691', 'step': 15935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:34.809928', 'step': 15935, 'epoch': 3} {'type': 'loss', 'content': 0.12392780929803848, 'timestamp': '2025-09-10 02:55:34.816104', 'step': 15936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:34.871545', 'step': 15936, 'epoch': 3} {'type': 'loss', 'content': 0.060585372149944305, 'timestamp': '2025-09-10 02:55:34.873749', 'step': 15937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:34.930125', 'step': 15937, 'epoch': 3} {'type': 'loss', 'content': 0.0641854852437973, 'timestamp': '2025-09-10 02:55:34.932332', 'step': 15938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:34.986805', 'step': 15938, 'epoch': 3} {'type': 'loss', 'content': 0.05666034296154976, 'timestamp': '2025-09-10 02:55:34.989005', 'step': 15939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:35.044185', 'step': 15939, 'epoch': 3} {'type': 'loss', 'content': 0.13696515560150146, 'timestamp': '2025-09-10 02:55:35.050498', 'step': 15940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:35.105374', 'step': 15940, 'epoch': 3} {'type': 'loss', 'content': 0.08094898611307144, 'timestamp': '2025-09-10 02:55:35.107687', 'step': 15941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:35.161639', 'step': 15941, 'epoch': 3} {'type': 'loss', 'content': 0.22345876693725586, 'timestamp': '2025-09-10 02:55:35.163903', 'step': 15942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:35.217072', 'step': 15942, 'epoch': 3} {'type': 'loss', 'content': 0.13314616680145264, 'timestamp': '2025-09-10 02:55:35.219205', 'step': 15943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:35.273219', 'step': 15943, 'epoch': 3} {'type': 'loss', 'content': 0.07736664265394211, 'timestamp': '2025-09-10 02:55:35.279316', 'step': 15944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:35.333238', 'step': 15944, 'epoch': 3} {'type': 'loss', 'content': 0.074319988489151, 'timestamp': '2025-09-10 02:55:35.335499', 'step': 15945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:35.389739', 'step': 15945, 'epoch': 3} {'type': 'loss', 'content': 0.11450808495283127, 'timestamp': '2025-09-10 02:55:35.391959', 'step': 15946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:35.446352', 'step': 15946, 'epoch': 3} {'type': 'loss', 'content': 0.05806640163064003, 'timestamp': '2025-09-10 02:55:35.448621', 'step': 15947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:35.502808', 'step': 15947, 'epoch': 3} {'type': 'loss', 'content': 0.17329466342926025, 'timestamp': '2025-09-10 02:55:35.509072', 'step': 15948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:35.564182', 'step': 15948, 'epoch': 3} {'type': 'loss', 'content': 0.14831791818141937, 'timestamp': '2025-09-10 02:55:35.566472', 'step': 15949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:35.622963', 'step': 15949, 'epoch': 3} {'type': 'loss', 'content': 0.0419890433549881, 'timestamp': '2025-09-10 02:55:35.625173', 'step': 15950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:35.681116', 'step': 15950, 'epoch': 3} {'type': 'loss', 'content': 0.2157641053199768, 'timestamp': '2025-09-10 02:55:35.683584', 'step': 15951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:35.739520', 'step': 15951, 'epoch': 3} {'type': 'loss', 'content': 0.10974778234958649, 'timestamp': '2025-09-10 02:55:35.746198', 'step': 15952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:35.801368', 'step': 15952, 'epoch': 3} {'type': 'loss', 'content': 0.15582939982414246, 'timestamp': '2025-09-10 02:55:35.803666', 'step': 15953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:35.859667', 'step': 15953, 'epoch': 3} {'type': 'loss', 'content': 0.07000879943370819, 'timestamp': '2025-09-10 02:55:35.861867', 'step': 15954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:35.917737', 'step': 15954, 'epoch': 3} {'type': 'loss', 'content': 0.19469062983989716, 'timestamp': '2025-09-10 02:55:35.920135', 'step': 15955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:35.975089', 'step': 15955, 'epoch': 3} {'type': 'loss', 'content': 0.07420029491186142, 'timestamp': '2025-09-10 02:55:35.981371', 'step': 15956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:36.036718', 'step': 15956, 'epoch': 3} {'type': 'loss', 'content': 0.12206028401851654, 'timestamp': '2025-09-10 02:55:36.039118', 'step': 15957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:36.093161', 'step': 15957, 'epoch': 3} {'type': 'loss', 'content': 0.062467899173498154, 'timestamp': '2025-09-10 02:55:36.095305', 'step': 15958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:36.149451', 'step': 15958, 'epoch': 3} {'type': 'loss', 'content': 0.09247720241546631, 'timestamp': '2025-09-10 02:55:36.151609', 'step': 15959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:36.205429', 'step': 15959, 'epoch': 3} {'type': 'loss', 'content': 0.11035701632499695, 'timestamp': '2025-09-10 02:55:36.211725', 'step': 15960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:36.266432', 'step': 15960, 'epoch': 3} {'type': 'loss', 'content': 0.16183806955814362, 'timestamp': '2025-09-10 02:55:36.268718', 'step': 15961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:36.324413', 'step': 15961, 'epoch': 3} {'type': 'loss', 'content': 0.16116081178188324, 'timestamp': '2025-09-10 02:55:36.326725', 'step': 15962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:36.382214', 'step': 15962, 'epoch': 3} {'type': 'loss', 'content': 0.0806712806224823, 'timestamp': '2025-09-10 02:55:36.384654', 'step': 15963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:36.439696', 'step': 15963, 'epoch': 3} {'type': 'loss', 'content': 0.15225209295749664, 'timestamp': '2025-09-10 02:55:36.446063', 'step': 15964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:36.500053', 'step': 15964, 'epoch': 3} {'type': 'loss', 'content': 0.10878453403711319, 'timestamp': '2025-09-10 02:55:36.502460', 'step': 15965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:36.558466', 'step': 15965, 'epoch': 3} {'type': 'loss', 'content': 0.06525881588459015, 'timestamp': '2025-09-10 02:55:36.560719', 'step': 15966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:36.615630', 'step': 15966, 'epoch': 3} {'type': 'loss', 'content': 0.1511077880859375, 'timestamp': '2025-09-10 02:55:36.617892', 'step': 15967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:36.671832', 'step': 15967, 'epoch': 3} {'type': 'loss', 'content': 0.08635912835597992, 'timestamp': '2025-09-10 02:55:36.678159', 'step': 15968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:36.733946', 'step': 15968, 'epoch': 3} {'type': 'loss', 'content': 0.08085709810256958, 'timestamp': '2025-09-10 02:55:36.736157', 'step': 15969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:36.790329', 'step': 15969, 'epoch': 3} {'type': 'loss', 'content': 0.050169799476861954, 'timestamp': '2025-09-10 02:55:36.792696', 'step': 15970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:36.847756', 'step': 15970, 'epoch': 3} {'type': 'loss', 'content': 0.08791537582874298, 'timestamp': '2025-09-10 02:55:36.850211', 'step': 15971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:36.904397', 'step': 15971, 'epoch': 3} {'type': 'loss', 'content': 0.07702526450157166, 'timestamp': '2025-09-10 02:55:36.910713', 'step': 15972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:36.976670', 'step': 15972, 'epoch': 3} {'type': 'loss', 'content': 0.09538755565881729, 'timestamp': '2025-09-10 02:55:36.979051', 'step': 15973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:37.032515', 'step': 15973, 'epoch': 3} {'type': 'loss', 'content': 0.07288072258234024, 'timestamp': '2025-09-10 02:55:37.034755', 'step': 15974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:37.088182', 'step': 15974, 'epoch': 3} {'type': 'loss', 'content': 0.0962468832731247, 'timestamp': '2025-09-10 02:55:37.090431', 'step': 15975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:37.144730', 'step': 15975, 'epoch': 3} {'type': 'loss', 'content': 0.1425262689590454, 'timestamp': '2025-09-10 02:55:37.150803', 'step': 15976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:37.203876', 'step': 15976, 'epoch': 3} {'type': 'loss', 'content': 0.20130516588687897, 'timestamp': '2025-09-10 02:55:37.205954', 'step': 15977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:37.259017', 'step': 15977, 'epoch': 3} {'type': 'loss', 'content': 0.04505962133407593, 'timestamp': '2025-09-10 02:55:37.261142', 'step': 15978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:37.314922', 'step': 15978, 'epoch': 3} {'type': 'loss', 'content': 0.08387143909931183, 'timestamp': '2025-09-10 02:55:37.317187', 'step': 15979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:37.370574', 'step': 15979, 'epoch': 3} {'type': 'loss', 'content': 0.1395849585533142, 'timestamp': '2025-09-10 02:55:37.376774', 'step': 15980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:37.430379', 'step': 15980, 'epoch': 3} {'type': 'loss', 'content': 0.2299319952726364, 'timestamp': '2025-09-10 02:55:37.433857', 'step': 15981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:37.487667', 'step': 15981, 'epoch': 3} {'type': 'loss', 'content': 0.07728272676467896, 'timestamp': '2025-09-10 02:55:37.490016', 'step': 15982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:37.544343', 'step': 15982, 'epoch': 3} {'type': 'loss', 'content': 0.11185761541128159, 'timestamp': '2025-09-10 02:55:37.546583', 'step': 15983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:37.600295', 'step': 15983, 'epoch': 3} {'type': 'loss', 'content': 0.08517319709062576, 'timestamp': '2025-09-10 02:55:37.606553', 'step': 15984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:37.659937', 'step': 15984, 'epoch': 3} {'type': 'loss', 'content': 0.07656831294298172, 'timestamp': '2025-09-10 02:55:37.662219', 'step': 15985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:37.716031', 'step': 15985, 'epoch': 3} {'type': 'loss', 'content': 0.13710056245326996, 'timestamp': '2025-09-10 02:55:37.718330', 'step': 15986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:37.771625', 'step': 15986, 'epoch': 3} {'type': 'loss', 'content': 0.09910066425800323, 'timestamp': '2025-09-10 02:55:37.773611', 'step': 15987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:37.826909', 'step': 15987, 'epoch': 3} {'type': 'loss', 'content': 0.10212354362010956, 'timestamp': '2025-09-10 02:55:37.832932', 'step': 15988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:37.885408', 'step': 15988, 'epoch': 3} {'type': 'loss', 'content': 0.12731710076332092, 'timestamp': '2025-09-10 02:55:37.887580', 'step': 15989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:37.940924', 'step': 15989, 'epoch': 3} {'type': 'loss', 'content': 0.14466378092765808, 'timestamp': '2025-09-10 02:55:37.943043', 'step': 15990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:37.997167', 'step': 15990, 'epoch': 3} {'type': 'loss', 'content': 0.08800546824932098, 'timestamp': '2025-09-10 02:55:37.999581', 'step': 15991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:38.054420', 'step': 15991, 'epoch': 3} {'type': 'loss', 'content': 0.06852241605520248, 'timestamp': '2025-09-10 02:55:38.060375', 'step': 15992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:38.113226', 'step': 15992, 'epoch': 3} {'type': 'loss', 'content': 0.10896612703800201, 'timestamp': '2025-09-10 02:55:38.115353', 'step': 15993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:38.168520', 'step': 15993, 'epoch': 3} {'type': 'loss', 'content': 0.10954678803682327, 'timestamp': '2025-09-10 02:55:38.171061', 'step': 15994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:38.224889', 'step': 15994, 'epoch': 3} {'type': 'loss', 'content': 0.11064988374710083, 'timestamp': '2025-09-10 02:55:38.227304', 'step': 15995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:38.281092', 'step': 15995, 'epoch': 3} {'type': 'loss', 'content': 0.10166661441326141, 'timestamp': '2025-09-10 02:55:38.287136', 'step': 15996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:38.340811', 'step': 15996, 'epoch': 3} {'type': 'loss', 'content': 0.1214638277888298, 'timestamp': '2025-09-10 02:55:38.343062', 'step': 15997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:38.395945', 'step': 15997, 'epoch': 3} {'type': 'loss', 'content': 0.04953945428133011, 'timestamp': '2025-09-10 02:55:38.398073', 'step': 15998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:38.452158', 'step': 15998, 'epoch': 3} {'type': 'loss', 'content': 0.040053024888038635, 'timestamp': '2025-09-10 02:55:38.454592', 'step': 15999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:38.508945', 'step': 15999, 'epoch': 3} {'type': 'loss', 'content': 0.19683335721492767, 'timestamp': '2025-09-10 02:55:38.515049', 'step': 16000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16000', 'timestamp': '2025-09-10 02:55:38.883342', 'step': 16000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:38.944367', 'step': 16000, 'epoch': 3} {'type': 'loss', 'content': 0.06470571458339691, 'timestamp': '2025-09-10 02:55:38.946564', 'step': 16001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:39.002973', 'step': 16001, 'epoch': 3} {'type': 'loss', 'content': 0.027449410408735275, 'timestamp': '2025-09-10 02:55:39.005096', 'step': 16002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:39.060117', 'step': 16002, 'epoch': 3} {'type': 'loss', 'content': 0.21674194931983948, 'timestamp': '2025-09-10 02:55:39.062392', 'step': 16003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:39.116885', 'step': 16003, 'epoch': 3} {'type': 'loss', 'content': 0.04464917257428169, 'timestamp': '2025-09-10 02:55:39.122968', 'step': 16004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:39.181557', 'step': 16004, 'epoch': 3} {'type': 'loss', 'content': 0.12463773041963577, 'timestamp': '2025-09-10 02:55:39.183638', 'step': 16005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:39.239210', 'step': 16005, 'epoch': 3} {'type': 'loss', 'content': 0.07550696283578873, 'timestamp': '2025-09-10 02:55:39.241408', 'step': 16006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:39.298326', 'step': 16006, 'epoch': 3} {'type': 'loss', 'content': 0.11010700464248657, 'timestamp': '2025-09-10 02:55:39.300492', 'step': 16007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:39.355975', 'step': 16007, 'epoch': 3} {'type': 'loss', 'content': 0.058062661439180374, 'timestamp': '2025-09-10 02:55:39.362035', 'step': 16008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:39.415152', 'step': 16008, 'epoch': 3} {'type': 'loss', 'content': 0.0533638671040535, 'timestamp': '2025-09-10 02:55:39.417185', 'step': 16009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:39.470405', 'step': 16009, 'epoch': 3} {'type': 'loss', 'content': 0.0938408300280571, 'timestamp': '2025-09-10 02:55:39.472579', 'step': 16010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:39.526111', 'step': 16010, 'epoch': 3} {'type': 'loss', 'content': 0.05826755240559578, 'timestamp': '2025-09-10 02:55:39.528182', 'step': 16011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:39.593967', 'step': 16011, 'epoch': 3} {'type': 'loss', 'content': 0.08410648256540298, 'timestamp': '2025-09-10 02:55:39.600604', 'step': 16012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:39.653852', 'step': 16012, 'epoch': 3} {'type': 'loss', 'content': 0.11980906128883362, 'timestamp': '2025-09-10 02:55:39.656025', 'step': 16013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:39.710195', 'step': 16013, 'epoch': 3} {'type': 'loss', 'content': 0.05888136848807335, 'timestamp': '2025-09-10 02:55:39.712429', 'step': 16014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:39.766900', 'step': 16014, 'epoch': 3} {'type': 'loss', 'content': 0.08491459488868713, 'timestamp': '2025-09-10 02:55:39.768899', 'step': 16015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:39.821830', 'step': 16015, 'epoch': 3} {'type': 'loss', 'content': 0.07952386885881424, 'timestamp': '2025-09-10 02:55:39.827854', 'step': 16016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:39.883264', 'step': 16016, 'epoch': 3} {'type': 'loss', 'content': 0.12041052430868149, 'timestamp': '2025-09-10 02:55:39.885740', 'step': 16017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:39.939921', 'step': 16017, 'epoch': 3} {'type': 'loss', 'content': 0.11800294369459152, 'timestamp': '2025-09-10 02:55:39.942019', 'step': 16018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:39.997732', 'step': 16018, 'epoch': 3} {'type': 'loss', 'content': 0.11259224265813828, 'timestamp': '2025-09-10 02:55:39.999773', 'step': 16019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:40.053698', 'step': 16019, 'epoch': 3} {'type': 'loss', 'content': 0.0650932565331459, 'timestamp': '2025-09-10 02:55:40.059933', 'step': 16020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:40.114603', 'step': 16020, 'epoch': 3} {'type': 'loss', 'content': 0.05230747163295746, 'timestamp': '2025-09-10 02:55:40.116776', 'step': 16021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:40.172407', 'step': 16021, 'epoch': 3} {'type': 'loss', 'content': 0.06574411690235138, 'timestamp': '2025-09-10 02:55:40.174483', 'step': 16022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:40.229147', 'step': 16022, 'epoch': 3} {'type': 'loss', 'content': 0.13342945277690887, 'timestamp': '2025-09-10 02:55:40.231188', 'step': 16023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:40.284522', 'step': 16023, 'epoch': 3} {'type': 'loss', 'content': 0.08548689633607864, 'timestamp': '2025-09-10 02:55:40.290488', 'step': 16024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:40.343720', 'step': 16024, 'epoch': 3} {'type': 'loss', 'content': 0.10535307973623276, 'timestamp': '2025-09-10 02:55:40.345794', 'step': 16025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:40.399011', 'step': 16025, 'epoch': 3} {'type': 'loss', 'content': 0.12194480746984482, 'timestamp': '2025-09-10 02:55:40.401052', 'step': 16026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:40.454620', 'step': 16026, 'epoch': 3} {'type': 'loss', 'content': 0.12243327498435974, 'timestamp': '2025-09-10 02:55:40.456807', 'step': 16027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:40.512988', 'step': 16027, 'epoch': 3} {'type': 'loss', 'content': 0.1364258974790573, 'timestamp': '2025-09-10 02:55:40.519028', 'step': 16028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:40.580556', 'step': 16028, 'epoch': 3} {'type': 'loss', 'content': 0.05056837573647499, 'timestamp': '2025-09-10 02:55:40.582424', 'step': 16029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:40.647694', 'step': 16029, 'epoch': 3} {'type': 'loss', 'content': 0.11413149535655975, 'timestamp': '2025-09-10 02:55:40.651603', 'step': 16030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:40.711733', 'step': 16030, 'epoch': 3} {'type': 'loss', 'content': 0.11764642596244812, 'timestamp': '2025-09-10 02:55:40.713921', 'step': 16031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:40.768214', 'step': 16031, 'epoch': 3} {'type': 'loss', 'content': 0.10516978800296783, 'timestamp': '2025-09-10 02:55:40.775570', 'step': 16032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:40.830365', 'step': 16032, 'epoch': 3} {'type': 'loss', 'content': 0.11962023377418518, 'timestamp': '2025-09-10 02:55:40.833861', 'step': 16033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:40.888365', 'step': 16033, 'epoch': 3} {'type': 'loss', 'content': 0.1213553324341774, 'timestamp': '2025-09-10 02:55:40.891726', 'step': 16034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:40.949582', 'step': 16034, 'epoch': 3} {'type': 'loss', 'content': 0.06821607053279877, 'timestamp': '2025-09-10 02:55:40.953139', 'step': 16035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:41.008647', 'step': 16035, 'epoch': 3} {'type': 'loss', 'content': 0.07451916486024857, 'timestamp': '2025-09-10 02:55:41.016060', 'step': 16036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:41.069659', 'step': 16036, 'epoch': 3} {'type': 'loss', 'content': 0.05745123699307442, 'timestamp': '2025-09-10 02:55:41.071750', 'step': 16037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:41.128935', 'step': 16037, 'epoch': 3} {'type': 'loss', 'content': 0.17719842493534088, 'timestamp': '2025-09-10 02:55:41.131158', 'step': 16038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:41.185898', 'step': 16038, 'epoch': 3} {'type': 'loss', 'content': 0.13078448176383972, 'timestamp': '2025-09-10 02:55:41.188198', 'step': 16039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:41.241686', 'step': 16039, 'epoch': 3} {'type': 'loss', 'content': 0.07226639240980148, 'timestamp': '2025-09-10 02:55:41.247900', 'step': 16040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:41.301103', 'step': 16040, 'epoch': 3} {'type': 'loss', 'content': 0.14342501759529114, 'timestamp': '2025-09-10 02:55:41.303368', 'step': 16041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:41.356272', 'step': 16041, 'epoch': 3} {'type': 'loss', 'content': 0.11061456799507141, 'timestamp': '2025-09-10 02:55:41.358528', 'step': 16042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:41.412051', 'step': 16042, 'epoch': 3} {'type': 'loss', 'content': 0.1003478392958641, 'timestamp': '2025-09-10 02:55:41.414429', 'step': 16043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:41.468621', 'step': 16043, 'epoch': 3} {'type': 'loss', 'content': 0.16100741922855377, 'timestamp': '2025-09-10 02:55:41.474512', 'step': 16044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:41.527403', 'step': 16044, 'epoch': 3} {'type': 'loss', 'content': 0.08528784662485123, 'timestamp': '2025-09-10 02:55:41.529491', 'step': 16045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:41.582979', 'step': 16045, 'epoch': 3} {'type': 'loss', 'content': 0.09152151644229889, 'timestamp': '2025-09-10 02:55:41.585005', 'step': 16046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:41.638848', 'step': 16046, 'epoch': 3} {'type': 'loss', 'content': 0.08861851692199707, 'timestamp': '2025-09-10 02:55:41.640725', 'step': 16047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:41.694509', 'step': 16047, 'epoch': 3} {'type': 'loss', 'content': 0.11142552644014359, 'timestamp': '2025-09-10 02:55:41.700914', 'step': 16048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:41.754271', 'step': 16048, 'epoch': 3} {'type': 'loss', 'content': 0.08845510333776474, 'timestamp': '2025-09-10 02:55:41.756506', 'step': 16049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:41.811729', 'step': 16049, 'epoch': 3} {'type': 'loss', 'content': 0.09890975058078766, 'timestamp': '2025-09-10 02:55:41.814089', 'step': 16050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:41.868375', 'step': 16050, 'epoch': 3} {'type': 'loss', 'content': 0.13035805523395538, 'timestamp': '2025-09-10 02:55:41.870735', 'step': 16051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:41.925425', 'step': 16051, 'epoch': 3} {'type': 'loss', 'content': 0.12163377553224564, 'timestamp': '2025-09-10 02:55:41.931484', 'step': 16052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:41.985945', 'step': 16052, 'epoch': 3} {'type': 'loss', 'content': 0.17762203514575958, 'timestamp': '2025-09-10 02:55:41.988196', 'step': 16053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:42.041843', 'step': 16053, 'epoch': 3} {'type': 'loss', 'content': 0.04014647752046585, 'timestamp': '2025-09-10 02:55:42.043915', 'step': 16054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:42.097367', 'step': 16054, 'epoch': 3} {'type': 'loss', 'content': 0.12650571763515472, 'timestamp': '2025-09-10 02:55:42.099568', 'step': 16055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:42.153308', 'step': 16055, 'epoch': 3} {'type': 'loss', 'content': 0.1810769885778427, 'timestamp': '2025-09-10 02:55:42.159491', 'step': 16056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:42.213377', 'step': 16056, 'epoch': 3} {'type': 'loss', 'content': 0.06681204587221146, 'timestamp': '2025-09-10 02:55:42.215842', 'step': 16057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:42.271446', 'step': 16057, 'epoch': 3} {'type': 'loss', 'content': 0.16198945045471191, 'timestamp': '2025-09-10 02:55:42.273650', 'step': 16058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:42.328398', 'step': 16058, 'epoch': 3} {'type': 'loss', 'content': 0.105974480509758, 'timestamp': '2025-09-10 02:55:42.330571', 'step': 16059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:42.385432', 'step': 16059, 'epoch': 3} {'type': 'loss', 'content': 0.03620273247361183, 'timestamp': '2025-09-10 02:55:42.391510', 'step': 16060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:42.444368', 'step': 16060, 'epoch': 3} {'type': 'loss', 'content': 0.06415797024965286, 'timestamp': '2025-09-10 02:55:42.446399', 'step': 16061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:42.500113', 'step': 16061, 'epoch': 3} {'type': 'loss', 'content': 0.11186134815216064, 'timestamp': '2025-09-10 02:55:42.502227', 'step': 16062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:42.556169', 'step': 16062, 'epoch': 3} {'type': 'loss', 'content': 0.09360899776220322, 'timestamp': '2025-09-10 02:55:42.558457', 'step': 16063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:42.612527', 'step': 16063, 'epoch': 3} {'type': 'loss', 'content': 0.11443611234426498, 'timestamp': '2025-09-10 02:55:42.618479', 'step': 16064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:42.673334', 'step': 16064, 'epoch': 3} {'type': 'loss', 'content': 0.1003437340259552, 'timestamp': '2025-09-10 02:55:42.675413', 'step': 16065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:42.728714', 'step': 16065, 'epoch': 3} {'type': 'loss', 'content': 0.06000993773341179, 'timestamp': '2025-09-10 02:55:42.731122', 'step': 16066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:42.784540', 'step': 16066, 'epoch': 3} {'type': 'loss', 'content': 0.09161803871393204, 'timestamp': '2025-09-10 02:55:42.786553', 'step': 16067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:42.840700', 'step': 16067, 'epoch': 3} {'type': 'loss', 'content': 0.05269838124513626, 'timestamp': '2025-09-10 02:55:42.846360', 'step': 16068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:42.900795', 'step': 16068, 'epoch': 3} {'type': 'loss', 'content': 0.20064677298069, 'timestamp': '2025-09-10 02:55:42.903147', 'step': 16069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:42.958322', 'step': 16069, 'epoch': 3} {'type': 'loss', 'content': 0.0877649113535881, 'timestamp': '2025-09-10 02:55:42.960723', 'step': 16070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:43.015749', 'step': 16070, 'epoch': 3} {'type': 'loss', 'content': 0.09444878995418549, 'timestamp': '2025-09-10 02:55:43.018071', 'step': 16071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:43.074543', 'step': 16071, 'epoch': 3} {'type': 'loss', 'content': 0.06827310472726822, 'timestamp': '2025-09-10 02:55:43.081000', 'step': 16072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:43.135615', 'step': 16072, 'epoch': 3} {'type': 'loss', 'content': 0.08602450788021088, 'timestamp': '2025-09-10 02:55:43.137904', 'step': 16073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:43.192431', 'step': 16073, 'epoch': 3} {'type': 'loss', 'content': 0.07666612416505814, 'timestamp': '2025-09-10 02:55:43.194728', 'step': 16074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:43.248933', 'step': 16074, 'epoch': 3} {'type': 'loss', 'content': 0.05105834826827049, 'timestamp': '2025-09-10 02:55:43.251216', 'step': 16075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:43.307945', 'step': 16075, 'epoch': 3} {'type': 'loss', 'content': 0.10125322639942169, 'timestamp': '2025-09-10 02:55:43.314456', 'step': 16076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:43.370202', 'step': 16076, 'epoch': 3} {'type': 'loss', 'content': 0.11026347428560257, 'timestamp': '2025-09-10 02:55:43.372473', 'step': 16077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:43.427387', 'step': 16077, 'epoch': 3} {'type': 'loss', 'content': 0.158164381980896, 'timestamp': '2025-09-10 02:55:43.429563', 'step': 16078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:43.485226', 'step': 16078, 'epoch': 3} {'type': 'loss', 'content': 0.04176953434944153, 'timestamp': '2025-09-10 02:55:43.487620', 'step': 16079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:43.543628', 'step': 16079, 'epoch': 3} {'type': 'loss', 'content': 0.1326945722103119, 'timestamp': '2025-09-10 02:55:43.549875', 'step': 16080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:43.604505', 'step': 16080, 'epoch': 3} {'type': 'loss', 'content': 0.11117766052484512, 'timestamp': '2025-09-10 02:55:43.606805', 'step': 16081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:43.661817', 'step': 16081, 'epoch': 3} {'type': 'loss', 'content': 0.14473533630371094, 'timestamp': '2025-09-10 02:55:43.664086', 'step': 16082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:43.720383', 'step': 16082, 'epoch': 3} {'type': 'loss', 'content': 0.06371443718671799, 'timestamp': '2025-09-10 02:55:43.722784', 'step': 16083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:43.778121', 'step': 16083, 'epoch': 3} {'type': 'loss', 'content': 0.04765104502439499, 'timestamp': '2025-09-10 02:55:43.784622', 'step': 16084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:43.840175', 'step': 16084, 'epoch': 3} {'type': 'loss', 'content': 0.0957879051566124, 'timestamp': '2025-09-10 02:55:43.842667', 'step': 16085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:43.897702', 'step': 16085, 'epoch': 3} {'type': 'loss', 'content': 0.0838852971792221, 'timestamp': '2025-09-10 02:55:43.900132', 'step': 16086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:43.955674', 'step': 16086, 'epoch': 3} {'type': 'loss', 'content': 0.09480549395084381, 'timestamp': '2025-09-10 02:55:43.957854', 'step': 16087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:44.012808', 'step': 16087, 'epoch': 3} {'type': 'loss', 'content': 0.040909819304943085, 'timestamp': '2025-09-10 02:55:44.019164', 'step': 16088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:44.073515', 'step': 16088, 'epoch': 3} {'type': 'loss', 'content': 0.07310284674167633, 'timestamp': '2025-09-10 02:55:44.075939', 'step': 16089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:44.131341', 'step': 16089, 'epoch': 3} {'type': 'loss', 'content': 0.1361541748046875, 'timestamp': '2025-09-10 02:55:44.133679', 'step': 16090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:44.189509', 'step': 16090, 'epoch': 3} {'type': 'loss', 'content': 0.10814572870731354, 'timestamp': '2025-09-10 02:55:44.191822', 'step': 16091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:44.247797', 'step': 16091, 'epoch': 3} {'type': 'loss', 'content': 0.12670433521270752, 'timestamp': '2025-09-10 02:55:44.254186', 'step': 16092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:44.308864', 'step': 16092, 'epoch': 3} {'type': 'loss', 'content': 0.0701722800731659, 'timestamp': '2025-09-10 02:55:44.311147', 'step': 16093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:44.366236', 'step': 16093, 'epoch': 3} {'type': 'loss', 'content': 0.11911003291606903, 'timestamp': '2025-09-10 02:55:44.368434', 'step': 16094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:44.423853', 'step': 16094, 'epoch': 3} {'type': 'loss', 'content': 0.10173669457435608, 'timestamp': '2025-09-10 02:55:44.426084', 'step': 16095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:44.481329', 'step': 16095, 'epoch': 3} {'type': 'loss', 'content': 0.1626930832862854, 'timestamp': '2025-09-10 02:55:44.487449', 'step': 16096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:44.541821', 'step': 16096, 'epoch': 3} {'type': 'loss', 'content': 0.07193193584680557, 'timestamp': '2025-09-10 02:55:44.544050', 'step': 16097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:44.598743', 'step': 16097, 'epoch': 3} {'type': 'loss', 'content': 0.10902424156665802, 'timestamp': '2025-09-10 02:55:44.601226', 'step': 16098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:44.657989', 'step': 16098, 'epoch': 3} {'type': 'loss', 'content': 0.10319440066814423, 'timestamp': '2025-09-10 02:55:44.660449', 'step': 16099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:44.716665', 'step': 16099, 'epoch': 3} {'type': 'loss', 'content': 0.14869019389152527, 'timestamp': '2025-09-10 02:55:44.723287', 'step': 16100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:44.780455', 'step': 16100, 'epoch': 3} {'type': 'loss', 'content': 0.10182259976863861, 'timestamp': '2025-09-10 02:55:44.782588', 'step': 16101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:44.837490', 'step': 16101, 'epoch': 3} {'type': 'loss', 'content': 0.07206965982913971, 'timestamp': '2025-09-10 02:55:44.839545', 'step': 16102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:44.897988', 'step': 16102, 'epoch': 3} {'type': 'loss', 'content': 0.12191981822252274, 'timestamp': '2025-09-10 02:55:44.900443', 'step': 16103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:44.961916', 'step': 16103, 'epoch': 3} {'type': 'loss', 'content': 0.10120463371276855, 'timestamp': '2025-09-10 02:55:44.968699', 'step': 16104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:45.024462', 'step': 16104, 'epoch': 3} {'type': 'loss', 'content': 0.027449971064925194, 'timestamp': '2025-09-10 02:55:45.026504', 'step': 16105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.083638', 'step': 16105, 'epoch': 3} {'type': 'loss', 'content': 0.05326080322265625, 'timestamp': '2025-09-10 02:55:45.085780', 'step': 16106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:45.142897', 'step': 16106, 'epoch': 3} {'type': 'loss', 'content': 0.15016892552375793, 'timestamp': '2025-09-10 02:55:45.144938', 'step': 16107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.202136', 'step': 16107, 'epoch': 3} {'type': 'loss', 'content': 0.15622811019420624, 'timestamp': '2025-09-10 02:55:45.208588', 'step': 16108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.263991', 'step': 16108, 'epoch': 3} {'type': 'loss', 'content': 0.05177317187190056, 'timestamp': '2025-09-10 02:55:45.266109', 'step': 16109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:45.321291', 'step': 16109, 'epoch': 3} {'type': 'loss', 'content': 0.08864986151456833, 'timestamp': '2025-09-10 02:55:45.323363', 'step': 16110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:45.378017', 'step': 16110, 'epoch': 3} {'type': 'loss', 'content': 0.16165943443775177, 'timestamp': '2025-09-10 02:55:45.380077', 'step': 16111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:45.435556', 'step': 16111, 'epoch': 3} {'type': 'loss', 'content': 0.052543457597494125, 'timestamp': '2025-09-10 02:55:45.441988', 'step': 16112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.497125', 'step': 16112, 'epoch': 3} {'type': 'loss', 'content': 0.12663687765598297, 'timestamp': '2025-09-10 02:55:45.499606', 'step': 16113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.555826', 'step': 16113, 'epoch': 3} {'type': 'loss', 'content': 0.10962674021720886, 'timestamp': '2025-09-10 02:55:45.558356', 'step': 16114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.613827', 'step': 16114, 'epoch': 3} {'type': 'loss', 'content': 0.09512783586978912, 'timestamp': '2025-09-10 02:55:45.616053', 'step': 16115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.671257', 'step': 16115, 'epoch': 3} {'type': 'loss', 'content': 0.0747426450252533, 'timestamp': '2025-09-10 02:55:45.677762', 'step': 16116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.733015', 'step': 16116, 'epoch': 3} {'type': 'loss', 'content': 0.0960230678319931, 'timestamp': '2025-09-10 02:55:45.735275', 'step': 16117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.790207', 'step': 16117, 'epoch': 3} {'type': 'loss', 'content': 0.11971630156040192, 'timestamp': '2025-09-10 02:55:45.792502', 'step': 16118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.847433', 'step': 16118, 'epoch': 3} {'type': 'loss', 'content': 0.0629524439573288, 'timestamp': '2025-09-10 02:55:45.849529', 'step': 16119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:45.905380', 'step': 16119, 'epoch': 3} {'type': 'loss', 'content': 0.0815478190779686, 'timestamp': '2025-09-10 02:55:45.911771', 'step': 16120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:45.966838', 'step': 16120, 'epoch': 3} {'type': 'loss', 'content': 0.025526169687509537, 'timestamp': '2025-09-10 02:55:45.969081', 'step': 16121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:46.024140', 'step': 16121, 'epoch': 3} {'type': 'loss', 'content': 0.045528341084718704, 'timestamp': '2025-09-10 02:55:46.026151', 'step': 16122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:46.082526', 'step': 16122, 'epoch': 3} {'type': 'loss', 'content': 0.09747865796089172, 'timestamp': '2025-09-10 02:55:46.084877', 'step': 16123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:46.139508', 'step': 16123, 'epoch': 3} {'type': 'loss', 'content': 0.09370473772287369, 'timestamp': '2025-09-10 02:55:46.145863', 'step': 16124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:46.200259', 'step': 16124, 'epoch': 3} {'type': 'loss', 'content': 0.0878492221236229, 'timestamp': '2025-09-10 02:55:46.202608', 'step': 16125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:46.257922', 'step': 16125, 'epoch': 3} {'type': 'loss', 'content': 0.1868966817855835, 'timestamp': '2025-09-10 02:55:46.260208', 'step': 16126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:46.316214', 'step': 16126, 'epoch': 3} {'type': 'loss', 'content': 0.09431479126214981, 'timestamp': '2025-09-10 02:55:46.320451', 'step': 16127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:46.376375', 'step': 16127, 'epoch': 3} {'type': 'loss', 'content': 0.06237058341503143, 'timestamp': '2025-09-10 02:55:46.382944', 'step': 16128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:46.437607', 'step': 16128, 'epoch': 3} {'type': 'loss', 'content': 0.06631109118461609, 'timestamp': '2025-09-10 02:55:46.439885', 'step': 16129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:46.494964', 'step': 16129, 'epoch': 3} {'type': 'loss', 'content': 0.17632021009922028, 'timestamp': '2025-09-10 02:55:46.497230', 'step': 16130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:46.555583', 'step': 16130, 'epoch': 3} {'type': 'loss', 'content': 0.03886561840772629, 'timestamp': '2025-09-10 02:55:46.557698', 'step': 16131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:46.613536', 'step': 16131, 'epoch': 3} {'type': 'loss', 'content': 0.14910338819026947, 'timestamp': '2025-09-10 02:55:46.619867', 'step': 16132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:46.675566', 'step': 16132, 'epoch': 3} {'type': 'loss', 'content': 0.05888503044843674, 'timestamp': '2025-09-10 02:55:46.677818', 'step': 16133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:46.733519', 'step': 16133, 'epoch': 3} {'type': 'loss', 'content': 0.08791998028755188, 'timestamp': '2025-09-10 02:55:46.735654', 'step': 16134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:46.791028', 'step': 16134, 'epoch': 3} {'type': 'loss', 'content': 0.10854751616716385, 'timestamp': '2025-09-10 02:55:46.793422', 'step': 16135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:46.849451', 'step': 16135, 'epoch': 3} {'type': 'loss', 'content': 0.0792311280965805, 'timestamp': '2025-09-10 02:55:46.855697', 'step': 16136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:46.910264', 'step': 16136, 'epoch': 3} {'type': 'loss', 'content': 0.1024215817451477, 'timestamp': '2025-09-10 02:55:46.912614', 'step': 16137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:46.967725', 'step': 16137, 'epoch': 3} {'type': 'loss', 'content': 0.08928333222866058, 'timestamp': '2025-09-10 02:55:46.970020', 'step': 16138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:47.024896', 'step': 16138, 'epoch': 3} {'type': 'loss', 'content': 0.10550066083669662, 'timestamp': '2025-09-10 02:55:47.027001', 'step': 16139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:47.083974', 'step': 16139, 'epoch': 3} {'type': 'loss', 'content': 0.09698104113340378, 'timestamp': '2025-09-10 02:55:47.090344', 'step': 16140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:47.145760', 'step': 16140, 'epoch': 3} {'type': 'loss', 'content': 0.11934971809387207, 'timestamp': '2025-09-10 02:55:47.148202', 'step': 16141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:47.202970', 'step': 16141, 'epoch': 3} {'type': 'loss', 'content': 0.07346003502607346, 'timestamp': '2025-09-10 02:55:47.205411', 'step': 16142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:47.261269', 'step': 16142, 'epoch': 3} {'type': 'loss', 'content': 0.088407002389431, 'timestamp': '2025-09-10 02:55:47.263699', 'step': 16143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:47.318920', 'step': 16143, 'epoch': 3} {'type': 'loss', 'content': 0.07151481509208679, 'timestamp': '2025-09-10 02:55:47.325272', 'step': 16144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:47.379560', 'step': 16144, 'epoch': 3} {'type': 'loss', 'content': 0.11117403954267502, 'timestamp': '2025-09-10 02:55:47.381814', 'step': 16145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:47.439515', 'step': 16145, 'epoch': 3} {'type': 'loss', 'content': 0.19361303746700287, 'timestamp': '2025-09-10 02:55:47.441697', 'step': 16146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:47.497100', 'step': 16146, 'epoch': 3} {'type': 'loss', 'content': 0.12281566113233566, 'timestamp': '2025-09-10 02:55:47.499422', 'step': 16147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:47.554655', 'step': 16147, 'epoch': 3} {'type': 'loss', 'content': 0.1325269192457199, 'timestamp': '2025-09-10 02:55:47.561121', 'step': 16148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:47.615463', 'step': 16148, 'epoch': 3} {'type': 'loss', 'content': 0.07447521388530731, 'timestamp': '2025-09-10 02:55:47.617676', 'step': 16149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:47.672303', 'step': 16149, 'epoch': 3} {'type': 'loss', 'content': 0.15804709494113922, 'timestamp': '2025-09-10 02:55:47.674538', 'step': 16150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:47.730023', 'step': 16150, 'epoch': 3} {'type': 'loss', 'content': 0.20091719925403595, 'timestamp': '2025-09-10 02:55:47.732183', 'step': 16151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:47.790035', 'step': 16151, 'epoch': 3} {'type': 'loss', 'content': 0.10577742755413055, 'timestamp': '2025-09-10 02:55:47.796552', 'step': 16152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:47.850648', 'step': 16152, 'epoch': 3} {'type': 'loss', 'content': 0.13324472308158875, 'timestamp': '2025-09-10 02:55:47.852703', 'step': 16153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:47.907640', 'step': 16153, 'epoch': 3} {'type': 'loss', 'content': 0.10646873712539673, 'timestamp': '2025-09-10 02:55:47.909957', 'step': 16154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:47.965127', 'step': 16154, 'epoch': 3} {'type': 'loss', 'content': 0.05715186893939972, 'timestamp': '2025-09-10 02:55:47.967565', 'step': 16155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:48.022884', 'step': 16155, 'epoch': 3} {'type': 'loss', 'content': 0.0550839863717556, 'timestamp': '2025-09-10 02:55:48.029265', 'step': 16156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:48.083585', 'step': 16156, 'epoch': 3} {'type': 'loss', 'content': 0.14367780089378357, 'timestamp': '2025-09-10 02:55:48.086003', 'step': 16157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:48.143654', 'step': 16157, 'epoch': 3} {'type': 'loss', 'content': 0.17889578640460968, 'timestamp': '2025-09-10 02:55:48.148659', 'step': 16158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:48.203845', 'step': 16158, 'epoch': 3} {'type': 'loss', 'content': 0.06015637144446373, 'timestamp': '2025-09-10 02:55:48.206146', 'step': 16159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:48.261161', 'step': 16159, 'epoch': 3} {'type': 'loss', 'content': 0.1404981166124344, 'timestamp': '2025-09-10 02:55:48.267553', 'step': 16160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:48.321773', 'step': 16160, 'epoch': 3} {'type': 'loss', 'content': 0.18858014047145844, 'timestamp': '2025-09-10 02:55:48.324069', 'step': 16161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:48.378809', 'step': 16161, 'epoch': 3} {'type': 'loss', 'content': 0.10031536221504211, 'timestamp': '2025-09-10 02:55:48.381144', 'step': 16162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:48.436078', 'step': 16162, 'epoch': 3} {'type': 'loss', 'content': 0.06869067251682281, 'timestamp': '2025-09-10 02:55:48.438495', 'step': 16163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:48.493296', 'step': 16163, 'epoch': 3} {'type': 'loss', 'content': 0.11309577524662018, 'timestamp': '2025-09-10 02:55:48.499654', 'step': 16164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:48.554219', 'step': 16164, 'epoch': 3} {'type': 'loss', 'content': 0.0936717838048935, 'timestamp': '2025-09-10 02:55:48.556499', 'step': 16165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:48.612236', 'step': 16165, 'epoch': 3} {'type': 'loss', 'content': 0.05164375156164169, 'timestamp': '2025-09-10 02:55:48.614574', 'step': 16166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:48.672395', 'step': 16166, 'epoch': 3} {'type': 'loss', 'content': 0.05990242958068848, 'timestamp': '2025-09-10 02:55:48.674689', 'step': 16167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:48.729712', 'step': 16167, 'epoch': 3} {'type': 'loss', 'content': 0.06573834270238876, 'timestamp': '2025-09-10 02:55:48.736093', 'step': 16168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:48.795118', 'step': 16168, 'epoch': 3} {'type': 'loss', 'content': 0.09801744669675827, 'timestamp': '2025-09-10 02:55:48.797447', 'step': 16169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:48.851933', 'step': 16169, 'epoch': 3} {'type': 'loss', 'content': 0.10166149586439133, 'timestamp': '2025-09-10 02:55:48.854190', 'step': 16170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:48.908724', 'step': 16170, 'epoch': 3} {'type': 'loss', 'content': 0.0905008465051651, 'timestamp': '2025-09-10 02:55:48.911077', 'step': 16171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:48.965528', 'step': 16171, 'epoch': 3} {'type': 'loss', 'content': 0.06720142811536789, 'timestamp': '2025-09-10 02:55:48.971888', 'step': 16172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.025961', 'step': 16172, 'epoch': 3} {'type': 'loss', 'content': 0.10554671287536621, 'timestamp': '2025-09-10 02:55:49.028130', 'step': 16173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.083298', 'step': 16173, 'epoch': 3} {'type': 'loss', 'content': 0.07327457517385483, 'timestamp': '2025-09-10 02:55:49.085599', 'step': 16174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.140428', 'step': 16174, 'epoch': 3} {'type': 'loss', 'content': 0.10208246111869812, 'timestamp': '2025-09-10 02:55:49.142731', 'step': 16175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:49.203334', 'step': 16175, 'epoch': 3} {'type': 'loss', 'content': 0.11518818885087967, 'timestamp': '2025-09-10 02:55:49.209704', 'step': 16176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.263716', 'step': 16176, 'epoch': 3} {'type': 'loss', 'content': 0.060815297067165375, 'timestamp': '2025-09-10 02:55:49.266462', 'step': 16177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.322018', 'step': 16177, 'epoch': 3} {'type': 'loss', 'content': 0.1544961780309677, 'timestamp': '2025-09-10 02:55:49.324713', 'step': 16178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.379925', 'step': 16178, 'epoch': 3} {'type': 'loss', 'content': 0.13359832763671875, 'timestamp': '2025-09-10 02:55:49.382446', 'step': 16179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.437594', 'step': 16179, 'epoch': 3} {'type': 'loss', 'content': 0.1471782922744751, 'timestamp': '2025-09-10 02:55:49.443845', 'step': 16180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:49.498974', 'step': 16180, 'epoch': 3} {'type': 'loss', 'content': 0.09007330238819122, 'timestamp': '2025-09-10 02:55:49.500985', 'step': 16181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:49.555712', 'step': 16181, 'epoch': 3} {'type': 'loss', 'content': 0.1772502362728119, 'timestamp': '2025-09-10 02:55:49.557748', 'step': 16182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.618658', 'step': 16182, 'epoch': 3} {'type': 'loss', 'content': 0.10849098116159439, 'timestamp': '2025-09-10 02:55:49.621043', 'step': 16183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.676736', 'step': 16183, 'epoch': 3} {'type': 'loss', 'content': 0.0413190983235836, 'timestamp': '2025-09-10 02:55:49.683191', 'step': 16184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.740645', 'step': 16184, 'epoch': 3} {'type': 'loss', 'content': 0.08049412071704865, 'timestamp': '2025-09-10 02:55:49.742991', 'step': 16185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:49.797930', 'step': 16185, 'epoch': 3} {'type': 'loss', 'content': 0.08203563839197159, 'timestamp': '2025-09-10 02:55:49.800352', 'step': 16186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:49.855307', 'step': 16186, 'epoch': 3} {'type': 'loss', 'content': 0.15787996351718903, 'timestamp': '2025-09-10 02:55:49.857557', 'step': 16187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:49.912116', 'step': 16187, 'epoch': 3} {'type': 'loss', 'content': 0.030762597918510437, 'timestamp': '2025-09-10 02:55:49.918512', 'step': 16188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:49.974399', 'step': 16188, 'epoch': 3} {'type': 'loss', 'content': 0.08100514858961105, 'timestamp': '2025-09-10 02:55:49.976661', 'step': 16189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:50.033284', 'step': 16189, 'epoch': 3} {'type': 'loss', 'content': 0.08770553022623062, 'timestamp': '2025-09-10 02:55:50.035481', 'step': 16190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:50.090562', 'step': 16190, 'epoch': 3} {'type': 'loss', 'content': 0.09632302820682526, 'timestamp': '2025-09-10 02:55:50.092959', 'step': 16191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:50.148418', 'step': 16191, 'epoch': 3} {'type': 'loss', 'content': 0.07685180753469467, 'timestamp': '2025-09-10 02:55:50.154829', 'step': 16192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:50.209707', 'step': 16192, 'epoch': 3} {'type': 'loss', 'content': 0.12899602949619293, 'timestamp': '2025-09-10 02:55:50.212033', 'step': 16193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:50.268541', 'step': 16193, 'epoch': 3} {'type': 'loss', 'content': 0.08245938271284103, 'timestamp': '2025-09-10 02:55:50.270808', 'step': 16194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:50.327432', 'step': 16194, 'epoch': 3} {'type': 'loss', 'content': 0.05559360980987549, 'timestamp': '2025-09-10 02:55:50.329920', 'step': 16195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:50.388486', 'step': 16195, 'epoch': 3} {'type': 'loss', 'content': 0.11629042029380798, 'timestamp': '2025-09-10 02:55:50.395004', 'step': 16196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:50.449880', 'step': 16196, 'epoch': 3} {'type': 'loss', 'content': 0.14990738034248352, 'timestamp': '2025-09-10 02:55:50.452186', 'step': 16197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:50.506392', 'step': 16197, 'epoch': 3} {'type': 'loss', 'content': 0.1236814558506012, 'timestamp': '2025-09-10 02:55:50.508745', 'step': 16198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:50.565526', 'step': 16198, 'epoch': 3} {'type': 'loss', 'content': 0.08158911764621735, 'timestamp': '2025-09-10 02:55:50.567727', 'step': 16199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:50.621790', 'step': 16199, 'epoch': 3} {'type': 'loss', 'content': 0.09828249365091324, 'timestamp': '2025-09-10 02:55:50.628236', 'step': 16200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:50.682498', 'step': 16200, 'epoch': 3} {'type': 'loss', 'content': 0.10877262055873871, 'timestamp': '2025-09-10 02:55:50.690898', 'step': 16201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:50.749878', 'step': 16201, 'epoch': 3} {'type': 'loss', 'content': 0.07708994299173355, 'timestamp': '2025-09-10 02:55:50.752244', 'step': 16202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:50.815174', 'step': 16202, 'epoch': 3} {'type': 'loss', 'content': 0.10983199626207352, 'timestamp': '2025-09-10 02:55:50.817797', 'step': 16203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:50.874687', 'step': 16203, 'epoch': 3} {'type': 'loss', 'content': 0.0846765860915184, 'timestamp': '2025-09-10 02:55:50.881176', 'step': 16204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:50.935808', 'step': 16204, 'epoch': 3} {'type': 'loss', 'content': 0.0553409643471241, 'timestamp': '2025-09-10 02:55:50.943288', 'step': 16205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:51.000813', 'step': 16205, 'epoch': 3} {'type': 'loss', 'content': 0.13709783554077148, 'timestamp': '2025-09-10 02:55:51.003284', 'step': 16206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:51.061075', 'step': 16206, 'epoch': 3} {'type': 'loss', 'content': 0.1487296223640442, 'timestamp': '2025-09-10 02:55:51.064734', 'step': 16207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:51.120964', 'step': 16207, 'epoch': 3} {'type': 'loss', 'content': 0.07876790314912796, 'timestamp': '2025-09-10 02:55:51.127383', 'step': 16208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:51.184504', 'step': 16208, 'epoch': 3} {'type': 'loss', 'content': 0.09621979296207428, 'timestamp': '2025-09-10 02:55:51.186862', 'step': 16209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:51.241746', 'step': 16209, 'epoch': 3} {'type': 'loss', 'content': 0.09581990540027618, 'timestamp': '2025-09-10 02:55:51.243989', 'step': 16210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:51.299215', 'step': 16210, 'epoch': 3} {'type': 'loss', 'content': 0.11935020983219147, 'timestamp': '2025-09-10 02:55:51.301462', 'step': 16211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:51.359549', 'step': 16211, 'epoch': 3} {'type': 'loss', 'content': 0.07182881236076355, 'timestamp': '2025-09-10 02:55:51.366000', 'step': 16212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:51.420309', 'step': 16212, 'epoch': 3} {'type': 'loss', 'content': 0.10871894657611847, 'timestamp': '2025-09-10 02:55:51.422826', 'step': 16213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:51.478174', 'step': 16213, 'epoch': 3} {'type': 'loss', 'content': 0.08017225563526154, 'timestamp': '2025-09-10 02:55:51.480654', 'step': 16214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:51.535774', 'step': 16214, 'epoch': 3} {'type': 'loss', 'content': 0.08552008867263794, 'timestamp': '2025-09-10 02:55:51.538031', 'step': 16215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:51.593970', 'step': 16215, 'epoch': 3} {'type': 'loss', 'content': 0.13836067914962769, 'timestamp': '2025-09-10 02:55:51.600378', 'step': 16216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:51.654981', 'step': 16216, 'epoch': 3} {'type': 'loss', 'content': 0.07821732759475708, 'timestamp': '2025-09-10 02:55:51.657064', 'step': 16217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:51.711348', 'step': 16217, 'epoch': 3} {'type': 'loss', 'content': 0.07976865768432617, 'timestamp': '2025-09-10 02:55:51.715223', 'step': 16218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:51.771999', 'step': 16218, 'epoch': 3} {'type': 'loss', 'content': 0.09731407463550568, 'timestamp': '2025-09-10 02:55:51.774154', 'step': 16219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:51.830188', 'step': 16219, 'epoch': 3} {'type': 'loss', 'content': 0.04582655057311058, 'timestamp': '2025-09-10 02:55:51.839727', 'step': 16220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:51.897823', 'step': 16220, 'epoch': 3} {'type': 'loss', 'content': 0.19973066449165344, 'timestamp': '2025-09-10 02:55:51.900341', 'step': 16221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:51.955692', 'step': 16221, 'epoch': 3} {'type': 'loss', 'content': 0.0877440795302391, 'timestamp': '2025-09-10 02:55:51.958035', 'step': 16222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:52.031893', 'step': 16222, 'epoch': 3} {'type': 'loss', 'content': 0.1497451364994049, 'timestamp': '2025-09-10 02:55:52.034098', 'step': 16223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:52.102084', 'step': 16223, 'epoch': 3} {'type': 'loss', 'content': 0.104612335562706, 'timestamp': '2025-09-10 02:55:52.111453', 'step': 16224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:52.171257', 'step': 16224, 'epoch': 3} {'type': 'loss', 'content': 0.09805899858474731, 'timestamp': '2025-09-10 02:55:52.173365', 'step': 16225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:52.230874', 'step': 16225, 'epoch': 3} {'type': 'loss', 'content': 0.18420688807964325, 'timestamp': '2025-09-10 02:55:52.232921', 'step': 16226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:52.288137', 'step': 16226, 'epoch': 3} {'type': 'loss', 'content': 0.11789660155773163, 'timestamp': '2025-09-10 02:55:52.290321', 'step': 16227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:52.347111', 'step': 16227, 'epoch': 3} {'type': 'loss', 'content': 0.10529176890850067, 'timestamp': '2025-09-10 02:55:52.353642', 'step': 16228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:52.409476', 'step': 16228, 'epoch': 3} {'type': 'loss', 'content': 0.05345972999930382, 'timestamp': '2025-09-10 02:55:52.411988', 'step': 16229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:52.467829', 'step': 16229, 'epoch': 3} {'type': 'loss', 'content': 0.0637631043791771, 'timestamp': '2025-09-10 02:55:52.470224', 'step': 16230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:52.526481', 'step': 16230, 'epoch': 3} {'type': 'loss', 'content': 0.08018941432237625, 'timestamp': '2025-09-10 02:55:52.528740', 'step': 16231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:52.587291', 'step': 16231, 'epoch': 3} {'type': 'loss', 'content': 0.07376845926046371, 'timestamp': '2025-09-10 02:55:52.593739', 'step': 16232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:52.648465', 'step': 16232, 'epoch': 3} {'type': 'loss', 'content': 0.10545208305120468, 'timestamp': '2025-09-10 02:55:52.650927', 'step': 16233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:52.706969', 'step': 16233, 'epoch': 3} {'type': 'loss', 'content': 0.10031888633966446, 'timestamp': '2025-09-10 02:55:52.709392', 'step': 16234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:52.773357', 'step': 16234, 'epoch': 3} {'type': 'loss', 'content': 0.03473692759871483, 'timestamp': '2025-09-10 02:55:52.775307', 'step': 16235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:52.834179', 'step': 16235, 'epoch': 3} {'type': 'loss', 'content': 0.12001684308052063, 'timestamp': '2025-09-10 02:55:52.840474', 'step': 16236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:52.894085', 'step': 16236, 'epoch': 3} {'type': 'loss', 'content': 0.05129839852452278, 'timestamp': '2025-09-10 02:55:52.896406', 'step': 16237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:52.951475', 'step': 16237, 'epoch': 3} {'type': 'loss', 'content': 0.06811220943927765, 'timestamp': '2025-09-10 02:55:52.954573', 'step': 16238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:53.010862', 'step': 16238, 'epoch': 3} {'type': 'loss', 'content': 0.11851109564304352, 'timestamp': '2025-09-10 02:55:53.013139', 'step': 16239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:53.068082', 'step': 16239, 'epoch': 3} {'type': 'loss', 'content': 0.03312525525689125, 'timestamp': '2025-09-10 02:55:53.074139', 'step': 16240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:53.127932', 'step': 16240, 'epoch': 3} {'type': 'loss', 'content': 0.13492128252983093, 'timestamp': '2025-09-10 02:55:53.129771', 'step': 16241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:53.183839', 'step': 16241, 'epoch': 3} {'type': 'loss', 'content': 0.07585535198450089, 'timestamp': '2025-09-10 02:55:53.185892', 'step': 16242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:53.240744', 'step': 16242, 'epoch': 3} {'type': 'loss', 'content': 0.0838463231921196, 'timestamp': '2025-09-10 02:55:53.243307', 'step': 16243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:53.297438', 'step': 16243, 'epoch': 3} {'type': 'loss', 'content': 0.10777519643306732, 'timestamp': '2025-09-10 02:55:53.303702', 'step': 16244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:53.356797', 'step': 16244, 'epoch': 3} {'type': 'loss', 'content': 0.1621851772069931, 'timestamp': '2025-09-10 02:55:53.359096', 'step': 16245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:53.412520', 'step': 16245, 'epoch': 3} {'type': 'loss', 'content': 0.047594379633665085, 'timestamp': '2025-09-10 02:55:53.415006', 'step': 16246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:53.469155', 'step': 16246, 'epoch': 3} {'type': 'loss', 'content': 0.1122133657336235, 'timestamp': '2025-09-10 02:55:53.471632', 'step': 16247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:53.524927', 'step': 16247, 'epoch': 3} {'type': 'loss', 'content': 0.06637365370988846, 'timestamp': '2025-09-10 02:55:53.530899', 'step': 16248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:53.583954', 'step': 16248, 'epoch': 3} {'type': 'loss', 'content': 0.1198154091835022, 'timestamp': '2025-09-10 02:55:53.586141', 'step': 16249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:53.639870', 'step': 16249, 'epoch': 3} {'type': 'loss', 'content': 0.05036249756813049, 'timestamp': '2025-09-10 02:55:53.641852', 'step': 16250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:53.695485', 'step': 16250, 'epoch': 3} {'type': 'loss', 'content': 0.0627860352396965, 'timestamp': '2025-09-10 02:55:53.697389', 'step': 16251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:53.753671', 'step': 16251, 'epoch': 3} {'type': 'loss', 'content': 0.1858598291873932, 'timestamp': '2025-09-10 02:55:53.759832', 'step': 16252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:53.812754', 'step': 16252, 'epoch': 3} {'type': 'loss', 'content': 0.11383311450481415, 'timestamp': '2025-09-10 02:55:53.815157', 'step': 16253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:55:53.870758', 'step': 16253, 'epoch': 3} {'type': 'loss', 'content': 0.14199316501617432, 'timestamp': '2025-09-10 02:55:53.873141', 'step': 16254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:53.926495', 'step': 16254, 'epoch': 3} {'type': 'loss', 'content': 0.12596173584461212, 'timestamp': '2025-09-10 02:55:53.929056', 'step': 16255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:53.982950', 'step': 16255, 'epoch': 3} {'type': 'loss', 'content': 0.07313757389783859, 'timestamp': '2025-09-10 02:55:53.989506', 'step': 16256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:54.043631', 'step': 16256, 'epoch': 3} {'type': 'loss', 'content': 0.046700458973646164, 'timestamp': '2025-09-10 02:55:54.045919', 'step': 16257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:54.099678', 'step': 16257, 'epoch': 3} {'type': 'loss', 'content': 0.05401240289211273, 'timestamp': '2025-09-10 02:55:54.101886', 'step': 16258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:54.156076', 'step': 16258, 'epoch': 3} {'type': 'loss', 'content': 0.09041135758161545, 'timestamp': '2025-09-10 02:55:54.158159', 'step': 16259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:54.211600', 'step': 16259, 'epoch': 3} {'type': 'loss', 'content': 0.02737892046570778, 'timestamp': '2025-09-10 02:55:54.217425', 'step': 16260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:54.270715', 'step': 16260, 'epoch': 3} {'type': 'loss', 'content': 0.09980335086584091, 'timestamp': '2025-09-10 02:55:54.274076', 'step': 16261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:54.329841', 'step': 16261, 'epoch': 3} {'type': 'loss', 'content': 0.1017046645283699, 'timestamp': '2025-09-10 02:55:54.332021', 'step': 16262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:54.385945', 'step': 16262, 'epoch': 3} {'type': 'loss', 'content': 0.05388224124908447, 'timestamp': '2025-09-10 02:55:54.388191', 'step': 16263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:54.447873', 'step': 16263, 'epoch': 3} {'type': 'loss', 'content': 0.044919807463884354, 'timestamp': '2025-09-10 02:55:54.454171', 'step': 16264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:54.525020', 'step': 16264, 'epoch': 3} {'type': 'loss', 'content': 0.13567127287387848, 'timestamp': '2025-09-10 02:55:54.527135', 'step': 16265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:54.586577', 'step': 16265, 'epoch': 3} {'type': 'loss', 'content': 0.0633608028292656, 'timestamp': '2025-09-10 02:55:54.588586', 'step': 16266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:54.642886', 'step': 16266, 'epoch': 3} {'type': 'loss', 'content': 0.07912424951791763, 'timestamp': '2025-09-10 02:55:54.645385', 'step': 16267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:54.700299', 'step': 16267, 'epoch': 3} {'type': 'loss', 'content': 0.14978744089603424, 'timestamp': '2025-09-10 02:55:54.706662', 'step': 16268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:54.759672', 'step': 16268, 'epoch': 3} {'type': 'loss', 'content': 0.14023742079734802, 'timestamp': '2025-09-10 02:55:54.762105', 'step': 16269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:54.815978', 'step': 16269, 'epoch': 3} {'type': 'loss', 'content': 0.06871512532234192, 'timestamp': '2025-09-10 02:55:54.818475', 'step': 16270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:54.875228', 'step': 16270, 'epoch': 3} {'type': 'loss', 'content': 0.10506603121757507, 'timestamp': '2025-09-10 02:55:54.877896', 'step': 16271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:54.931809', 'step': 16271, 'epoch': 3} {'type': 'loss', 'content': 0.03705555573105812, 'timestamp': '2025-09-10 02:55:54.938005', 'step': 16272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:54.991235', 'step': 16272, 'epoch': 3} {'type': 'loss', 'content': 0.09266408532857895, 'timestamp': '2025-09-10 02:55:54.998625', 'step': 16273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:55.053326', 'step': 16273, 'epoch': 3} {'type': 'loss', 'content': 0.09415559470653534, 'timestamp': '2025-09-10 02:55:55.055213', 'step': 16274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:55.109785', 'step': 16274, 'epoch': 3} {'type': 'loss', 'content': 0.18732909858226776, 'timestamp': '2025-09-10 02:55:55.111763', 'step': 16275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:55.165442', 'step': 16275, 'epoch': 3} {'type': 'loss', 'content': 0.08901911228895187, 'timestamp': '2025-09-10 02:55:55.171415', 'step': 16276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:55.224687', 'step': 16276, 'epoch': 3} {'type': 'loss', 'content': 0.13380911946296692, 'timestamp': '2025-09-10 02:55:55.227069', 'step': 16277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:55.281532', 'step': 16277, 'epoch': 3} {'type': 'loss', 'content': 0.1469109058380127, 'timestamp': '2025-09-10 02:55:55.283695', 'step': 16278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:55.337702', 'step': 16278, 'epoch': 3} {'type': 'loss', 'content': 0.14750836789608002, 'timestamp': '2025-09-10 02:55:55.339996', 'step': 16279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:55.393985', 'step': 16279, 'epoch': 3} {'type': 'loss', 'content': 0.15740518271923065, 'timestamp': '2025-09-10 02:55:55.400012', 'step': 16280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:55.454113', 'step': 16280, 'epoch': 3} {'type': 'loss', 'content': 0.09935807436704636, 'timestamp': '2025-09-10 02:55:55.455900', 'step': 16281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:55.510063', 'step': 16281, 'epoch': 3} {'type': 'loss', 'content': 0.05555527284741402, 'timestamp': '2025-09-10 02:55:55.511964', 'step': 16282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:55.566233', 'step': 16282, 'epoch': 3} {'type': 'loss', 'content': 0.09236747026443481, 'timestamp': '2025-09-10 02:55:55.568040', 'step': 16283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:55.631195', 'step': 16283, 'epoch': 3} {'type': 'loss', 'content': 0.12357795238494873, 'timestamp': '2025-09-10 02:55:55.636926', 'step': 16284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:55.691776', 'step': 16284, 'epoch': 3} {'type': 'loss', 'content': 0.06315656751394272, 'timestamp': '2025-09-10 02:55:55.694089', 'step': 16285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:55.747886', 'step': 16285, 'epoch': 3} {'type': 'loss', 'content': 0.08596878498792648, 'timestamp': '2025-09-10 02:55:55.750407', 'step': 16286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:55.809766', 'step': 16286, 'epoch': 3} {'type': 'loss', 'content': 0.131977841258049, 'timestamp': '2025-09-10 02:55:55.812362', 'step': 16287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:55.868677', 'step': 16287, 'epoch': 3} {'type': 'loss', 'content': 0.06445597857236862, 'timestamp': '2025-09-10 02:55:55.874837', 'step': 16288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:55.929623', 'step': 16288, 'epoch': 3} {'type': 'loss', 'content': 0.07390203326940536, 'timestamp': '2025-09-10 02:55:55.932050', 'step': 16289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:55.986122', 'step': 16289, 'epoch': 3} {'type': 'loss', 'content': 0.09622896462678909, 'timestamp': '2025-09-10 02:55:55.988379', 'step': 16290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:56.042429', 'step': 16290, 'epoch': 3} {'type': 'loss', 'content': 0.12167803198099136, 'timestamp': '2025-09-10 02:55:56.044292', 'step': 16291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:56.099461', 'step': 16291, 'epoch': 3} {'type': 'loss', 'content': 0.08351591229438782, 'timestamp': '2025-09-10 02:55:56.105010', 'step': 16292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:56.161357', 'step': 16292, 'epoch': 3} {'type': 'loss', 'content': 0.014520651660859585, 'timestamp': '2025-09-10 02:55:56.163650', 'step': 16293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:56.222915', 'step': 16293, 'epoch': 3} {'type': 'loss', 'content': 0.15605507791042328, 'timestamp': '2025-09-10 02:55:56.225305', 'step': 16294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:56.279537', 'step': 16294, 'epoch': 3} {'type': 'loss', 'content': 0.07588659971952438, 'timestamp': '2025-09-10 02:55:56.281729', 'step': 16295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:56.335451', 'step': 16295, 'epoch': 3} {'type': 'loss', 'content': 0.1632748246192932, 'timestamp': '2025-09-10 02:55:56.341500', 'step': 16296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:56.394847', 'step': 16296, 'epoch': 3} {'type': 'loss', 'content': 0.07748716324567795, 'timestamp': '2025-09-10 02:55:56.397011', 'step': 16297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:56.450086', 'step': 16297, 'epoch': 3} {'type': 'loss', 'content': 0.19401511549949646, 'timestamp': '2025-09-10 02:55:56.452441', 'step': 16298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:56.507199', 'step': 16298, 'epoch': 3} {'type': 'loss', 'content': 0.10646999627351761, 'timestamp': '2025-09-10 02:55:56.509248', 'step': 16299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:56.562551', 'step': 16299, 'epoch': 3} {'type': 'loss', 'content': 0.048288494348526, 'timestamp': '2025-09-10 02:55:56.568328', 'step': 16300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:56.623461', 'step': 16300, 'epoch': 3} {'type': 'loss', 'content': 0.08797510713338852, 'timestamp': '2025-09-10 02:55:56.625242', 'step': 16301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:56.677974', 'step': 16301, 'epoch': 3} {'type': 'loss', 'content': 0.06196684017777443, 'timestamp': '2025-09-10 02:55:56.680230', 'step': 16302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:56.734094', 'step': 16302, 'epoch': 3} {'type': 'loss', 'content': 0.08494099229574203, 'timestamp': '2025-09-10 02:55:56.736446', 'step': 16303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:56.791007', 'step': 16303, 'epoch': 3} {'type': 'loss', 'content': 0.036588992923498154, 'timestamp': '2025-09-10 02:55:56.796891', 'step': 16304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:56.849853', 'step': 16304, 'epoch': 3} {'type': 'loss', 'content': 0.09629014134407043, 'timestamp': '2025-09-10 02:55:56.852127', 'step': 16305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:56.906166', 'step': 16305, 'epoch': 3} {'type': 'loss', 'content': 0.10868368297815323, 'timestamp': '2025-09-10 02:55:56.908449', 'step': 16306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:56.962010', 'step': 16306, 'epoch': 3} {'type': 'loss', 'content': 0.07992018759250641, 'timestamp': '2025-09-10 02:55:56.963927', 'step': 16307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:55:57.017337', 'step': 16307, 'epoch': 3} {'type': 'loss', 'content': 0.08772207796573639, 'timestamp': '2025-09-10 02:55:57.024248', 'step': 16308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:57.077939', 'step': 16308, 'epoch': 3} {'type': 'loss', 'content': 0.12508152425289154, 'timestamp': '2025-09-10 02:55:57.080282', 'step': 16309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:55:57.134118', 'step': 16309, 'epoch': 3} {'type': 'loss', 'content': 0.1212063655257225, 'timestamp': '2025-09-10 02:55:57.136444', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:56:10.023381', 'step': 16310, 'epoch': 3} {'type': 'pplx', 'content': 11548.271380668344, 'timestamp': '2025-09-10 02:56:10.026568', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:10.082710', 'step': 16310, 'epoch': 3} {'type': 'loss', 'content': 0.1244579628109932, 'timestamp': '2025-09-10 02:56:10.085020', 'step': 16311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:10.141447', 'step': 16311, 'epoch': 3} {'type': 'loss', 'content': 0.03575703874230385, 'timestamp': '2025-09-10 02:56:10.147655', 'step': 16312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:10.202162', 'step': 16312, 'epoch': 3} {'type': 'loss', 'content': 0.08065760135650635, 'timestamp': '2025-09-10 02:56:10.204478', 'step': 16313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:10.259289', 'step': 16313, 'epoch': 3} {'type': 'loss', 'content': 0.1278401017189026, 'timestamp': '2025-09-10 02:56:10.261614', 'step': 16314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:10.316405', 'step': 16314, 'epoch': 3} {'type': 'loss', 'content': 0.06342510133981705, 'timestamp': '2025-09-10 02:56:10.318764', 'step': 16315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:10.373806', 'step': 16315, 'epoch': 3} {'type': 'loss', 'content': 0.15797089040279388, 'timestamp': '2025-09-10 02:56:10.380600', 'step': 16316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:10.434809', 'step': 16316, 'epoch': 3} {'type': 'loss', 'content': 0.08987229317426682, 'timestamp': '2025-09-10 02:56:10.437045', 'step': 16317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:10.491789', 'step': 16317, 'epoch': 3} {'type': 'loss', 'content': 0.03883535787463188, 'timestamp': '2025-09-10 02:56:10.494014', 'step': 16318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:10.548745', 'step': 16318, 'epoch': 3} {'type': 'loss', 'content': 0.09979768842458725, 'timestamp': '2025-09-10 02:56:10.551257', 'step': 16319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:10.605973', 'step': 16319, 'epoch': 3} {'type': 'loss', 'content': 0.08531811833381653, 'timestamp': '2025-09-10 02:56:10.612228', 'step': 16320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:10.670687', 'step': 16320, 'epoch': 3} {'type': 'loss', 'content': 0.12022611498832703, 'timestamp': '2025-09-10 02:56:10.672928', 'step': 16321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:10.727273', 'step': 16321, 'epoch': 3} {'type': 'loss', 'content': 0.07206115126609802, 'timestamp': '2025-09-10 02:56:10.729672', 'step': 16322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:10.783494', 'step': 16322, 'epoch': 3} {'type': 'loss', 'content': 0.036823734641075134, 'timestamp': '2025-09-10 02:56:10.785824', 'step': 16323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:10.840607', 'step': 16323, 'epoch': 3} {'type': 'loss', 'content': 0.10940642654895782, 'timestamp': '2025-09-10 02:56:10.847074', 'step': 16324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:10.901897', 'step': 16324, 'epoch': 3} {'type': 'loss', 'content': 0.09420856833457947, 'timestamp': '2025-09-10 02:56:10.904033', 'step': 16325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:10.958353', 'step': 16325, 'epoch': 3} {'type': 'loss', 'content': 0.06697429716587067, 'timestamp': '2025-09-10 02:56:10.961153', 'step': 16326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:11.015628', 'step': 16326, 'epoch': 3} {'type': 'loss', 'content': 0.12775059044361115, 'timestamp': '2025-09-10 02:56:11.018150', 'step': 16327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:11.072998', 'step': 16327, 'epoch': 3} {'type': 'loss', 'content': 0.16854125261306763, 'timestamp': '2025-09-10 02:56:11.080733', 'step': 16328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:11.135492', 'step': 16328, 'epoch': 3} {'type': 'loss', 'content': 0.05421391874551773, 'timestamp': '2025-09-10 02:56:11.137815', 'step': 16329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:11.192516', 'step': 16329, 'epoch': 3} {'type': 'loss', 'content': 0.05682249367237091, 'timestamp': '2025-09-10 02:56:11.194969', 'step': 16330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:11.248676', 'step': 16330, 'epoch': 3} {'type': 'loss', 'content': 0.0853784903883934, 'timestamp': '2025-09-10 02:56:11.250917', 'step': 16331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:11.304348', 'step': 16331, 'epoch': 3} {'type': 'loss', 'content': 0.05181252583861351, 'timestamp': '2025-09-10 02:56:11.310442', 'step': 16332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:11.364589', 'step': 16332, 'epoch': 3} {'type': 'loss', 'content': 0.13795843720436096, 'timestamp': '2025-09-10 02:56:11.366828', 'step': 16333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:11.420818', 'step': 16333, 'epoch': 3} {'type': 'loss', 'content': 0.09815867990255356, 'timestamp': '2025-09-10 02:56:11.423025', 'step': 16334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:11.477222', 'step': 16334, 'epoch': 3} {'type': 'loss', 'content': 0.11064279824495316, 'timestamp': '2025-09-10 02:56:11.479496', 'step': 16335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:11.533473', 'step': 16335, 'epoch': 3} {'type': 'loss', 'content': 0.07461857050657272, 'timestamp': '2025-09-10 02:56:11.539767', 'step': 16336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:11.594438', 'step': 16336, 'epoch': 3} {'type': 'loss', 'content': 0.23119065165519714, 'timestamp': '2025-09-10 02:56:11.596776', 'step': 16337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:11.652290', 'step': 16337, 'epoch': 3} {'type': 'loss', 'content': 0.11334078013896942, 'timestamp': '2025-09-10 02:56:11.656557', 'step': 16338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:11.713456', 'step': 16338, 'epoch': 3} {'type': 'loss', 'content': 0.09867597371339798, 'timestamp': '2025-09-10 02:56:11.715861', 'step': 16339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:11.770538', 'step': 16339, 'epoch': 3} {'type': 'loss', 'content': 0.1703972965478897, 'timestamp': '2025-09-10 02:56:11.776603', 'step': 16340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:11.831740', 'step': 16340, 'epoch': 3} {'type': 'loss', 'content': 0.06179104372859001, 'timestamp': '2025-09-10 02:56:11.834257', 'step': 16341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:11.889339', 'step': 16341, 'epoch': 3} {'type': 'loss', 'content': 0.16938114166259766, 'timestamp': '2025-09-10 02:56:11.891438', 'step': 16342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:11.946106', 'step': 16342, 'epoch': 3} {'type': 'loss', 'content': 0.08118633180856705, 'timestamp': '2025-09-10 02:56:11.948019', 'step': 16343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:12.003977', 'step': 16343, 'epoch': 3} {'type': 'loss', 'content': 0.15427720546722412, 'timestamp': '2025-09-10 02:56:12.009803', 'step': 16344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:12.063348', 'step': 16344, 'epoch': 3} {'type': 'loss', 'content': 0.09895534068346024, 'timestamp': '2025-09-10 02:56:12.065360', 'step': 16345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:12.118506', 'step': 16345, 'epoch': 3} {'type': 'loss', 'content': 0.1805024892091751, 'timestamp': '2025-09-10 02:56:12.120678', 'step': 16346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:12.174344', 'step': 16346, 'epoch': 3} {'type': 'loss', 'content': 0.06934311240911484, 'timestamp': '2025-09-10 02:56:12.176543', 'step': 16347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:12.229840', 'step': 16347, 'epoch': 3} {'type': 'loss', 'content': 0.07345184683799744, 'timestamp': '2025-09-10 02:56:12.235937', 'step': 16348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:12.288742', 'step': 16348, 'epoch': 3} {'type': 'loss', 'content': 0.03938028961420059, 'timestamp': '2025-09-10 02:56:12.290887', 'step': 16349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:12.344984', 'step': 16349, 'epoch': 3} {'type': 'loss', 'content': 0.12572035193443298, 'timestamp': '2025-09-10 02:56:12.348914', 'step': 16350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:12.405298', 'step': 16350, 'epoch': 3} {'type': 'loss', 'content': 0.0794813334941864, 'timestamp': '2025-09-10 02:56:12.407645', 'step': 16351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:12.463084', 'step': 16351, 'epoch': 3} {'type': 'loss', 'content': 0.13171209394931793, 'timestamp': '2025-09-10 02:56:12.469815', 'step': 16352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:12.524250', 'step': 16352, 'epoch': 3} {'type': 'loss', 'content': 0.07953458279371262, 'timestamp': '2025-09-10 02:56:12.526496', 'step': 16353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:12.584881', 'step': 16353, 'epoch': 3} {'type': 'loss', 'content': 0.1392701417207718, 'timestamp': '2025-09-10 02:56:12.587200', 'step': 16354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:12.644240', 'step': 16354, 'epoch': 3} {'type': 'loss', 'content': 0.06219785660505295, 'timestamp': '2025-09-10 02:56:12.646291', 'step': 16355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:12.701686', 'step': 16355, 'epoch': 3} {'type': 'loss', 'content': 0.05050184205174446, 'timestamp': '2025-09-10 02:56:12.708108', 'step': 16356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:12.762949', 'step': 16356, 'epoch': 3} {'type': 'loss', 'content': 0.08674722909927368, 'timestamp': '2025-09-10 02:56:12.764963', 'step': 16357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:12.821805', 'step': 16357, 'epoch': 3} {'type': 'loss', 'content': 0.09139341115951538, 'timestamp': '2025-09-10 02:56:12.824094', 'step': 16358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:12.889486', 'step': 16358, 'epoch': 3} {'type': 'loss', 'content': 0.07096931338310242, 'timestamp': '2025-09-10 02:56:12.891639', 'step': 16359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:12.947187', 'step': 16359, 'epoch': 3} {'type': 'loss', 'content': 0.09462472051382065, 'timestamp': '2025-09-10 02:56:12.953610', 'step': 16360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:13.007546', 'step': 16360, 'epoch': 3} {'type': 'loss', 'content': 0.04849551245570183, 'timestamp': '2025-09-10 02:56:13.009754', 'step': 16361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:13.065640', 'step': 16361, 'epoch': 3} {'type': 'loss', 'content': 0.12415315210819244, 'timestamp': '2025-09-10 02:56:13.067905', 'step': 16362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:13.122398', 'step': 16362, 'epoch': 3} {'type': 'loss', 'content': 0.11121399700641632, 'timestamp': '2025-09-10 02:56:13.124469', 'step': 16363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:13.179164', 'step': 16363, 'epoch': 3} {'type': 'loss', 'content': 0.1291600465774536, 'timestamp': '2025-09-10 02:56:13.185145', 'step': 16364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:13.238732', 'step': 16364, 'epoch': 3} {'type': 'loss', 'content': 0.22933289408683777, 'timestamp': '2025-09-10 02:56:13.241165', 'step': 16365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:13.294621', 'step': 16365, 'epoch': 3} {'type': 'loss', 'content': 0.040452368557453156, 'timestamp': '2025-09-10 02:56:13.297050', 'step': 16366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:13.351350', 'step': 16366, 'epoch': 3} {'type': 'loss', 'content': 0.09225050359964371, 'timestamp': '2025-09-10 02:56:13.353566', 'step': 16367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:13.408024', 'step': 16367, 'epoch': 3} {'type': 'loss', 'content': 0.16972629725933075, 'timestamp': '2025-09-10 02:56:13.414200', 'step': 16368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:13.468074', 'step': 16368, 'epoch': 3} {'type': 'loss', 'content': 0.08559712022542953, 'timestamp': '2025-09-10 02:56:13.471770', 'step': 16369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:13.528495', 'step': 16369, 'epoch': 3} {'type': 'loss', 'content': 0.11724451184272766, 'timestamp': '2025-09-10 02:56:13.530608', 'step': 16370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:13.585321', 'step': 16370, 'epoch': 3} {'type': 'loss', 'content': 0.12982624769210815, 'timestamp': '2025-09-10 02:56:13.587420', 'step': 16371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:13.641514', 'step': 16371, 'epoch': 3} {'type': 'loss', 'content': 0.07219857722520828, 'timestamp': '2025-09-10 02:56:13.647536', 'step': 16372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:13.700507', 'step': 16372, 'epoch': 3} {'type': 'loss', 'content': 0.10678818076848984, 'timestamp': '2025-09-10 02:56:13.702553', 'step': 16373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:13.757632', 'step': 16373, 'epoch': 3} {'type': 'loss', 'content': 0.041348278522491455, 'timestamp': '2025-09-10 02:56:13.759855', 'step': 16374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:13.813273', 'step': 16374, 'epoch': 3} {'type': 'loss', 'content': 0.16887684166431427, 'timestamp': '2025-09-10 02:56:13.815533', 'step': 16375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:13.868880', 'step': 16375, 'epoch': 3} {'type': 'loss', 'content': 0.03186335414648056, 'timestamp': '2025-09-10 02:56:13.875214', 'step': 16376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:13.930274', 'step': 16376, 'epoch': 3} {'type': 'loss', 'content': 0.043033063411712646, 'timestamp': '2025-09-10 02:56:13.932529', 'step': 16377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:13.986254', 'step': 16377, 'epoch': 3} {'type': 'loss', 'content': 0.054419487714767456, 'timestamp': '2025-09-10 02:56:13.988732', 'step': 16378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:14.042730', 'step': 16378, 'epoch': 3} {'type': 'loss', 'content': 0.09837526828050613, 'timestamp': '2025-09-10 02:56:14.044967', 'step': 16379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:14.099471', 'step': 16379, 'epoch': 3} {'type': 'loss', 'content': 0.09882321953773499, 'timestamp': '2025-09-10 02:56:14.105812', 'step': 16380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:14.158515', 'step': 16380, 'epoch': 3} {'type': 'loss', 'content': 0.08918711543083191, 'timestamp': '2025-09-10 02:56:14.160787', 'step': 16381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:14.214751', 'step': 16381, 'epoch': 3} {'type': 'loss', 'content': 0.11539853364229202, 'timestamp': '2025-09-10 02:56:14.217014', 'step': 16382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:14.270934', 'step': 16382, 'epoch': 3} {'type': 'loss', 'content': 0.1511913686990738, 'timestamp': '2025-09-10 02:56:14.273205', 'step': 16383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:14.327824', 'step': 16383, 'epoch': 3} {'type': 'loss', 'content': 0.13296417891979218, 'timestamp': '2025-09-10 02:56:14.333958', 'step': 16384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:14.386908', 'step': 16384, 'epoch': 3} {'type': 'loss', 'content': 0.09813882410526276, 'timestamp': '2025-09-10 02:56:14.389166', 'step': 16385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:14.442714', 'step': 16385, 'epoch': 3} {'type': 'loss', 'content': 0.10639405995607376, 'timestamp': '2025-09-10 02:56:14.444904', 'step': 16386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:14.498703', 'step': 16386, 'epoch': 3} {'type': 'loss', 'content': 0.06942908465862274, 'timestamp': '2025-09-10 02:56:14.500950', 'step': 16387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:14.555327', 'step': 16387, 'epoch': 3} {'type': 'loss', 'content': 0.19691836833953857, 'timestamp': '2025-09-10 02:56:14.561512', 'step': 16388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:14.615445', 'step': 16388, 'epoch': 3} {'type': 'loss', 'content': 0.207749605178833, 'timestamp': '2025-09-10 02:56:14.617547', 'step': 16389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:14.670924', 'step': 16389, 'epoch': 3} {'type': 'loss', 'content': 0.12916867434978485, 'timestamp': '2025-09-10 02:56:14.673133', 'step': 16390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:14.726331', 'step': 16390, 'epoch': 3} {'type': 'loss', 'content': 0.062140174210071564, 'timestamp': '2025-09-10 02:56:14.728566', 'step': 16391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:14.783617', 'step': 16391, 'epoch': 3} {'type': 'loss', 'content': 0.06866353005170822, 'timestamp': '2025-09-10 02:56:14.789999', 'step': 16392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:14.845881', 'step': 16392, 'epoch': 3} {'type': 'loss', 'content': 0.06264949589967728, 'timestamp': '2025-09-10 02:56:14.848288', 'step': 16393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:14.905239', 'step': 16393, 'epoch': 3} {'type': 'loss', 'content': 0.07850681990385056, 'timestamp': '2025-09-10 02:56:14.907660', 'step': 16394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:14.965329', 'step': 16394, 'epoch': 3} {'type': 'loss', 'content': 0.08825615048408508, 'timestamp': '2025-09-10 02:56:14.967804', 'step': 16395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:15.025355', 'step': 16395, 'epoch': 3} {'type': 'loss', 'content': 0.08045835793018341, 'timestamp': '2025-09-10 02:56:15.031850', 'step': 16396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:15.088921', 'step': 16396, 'epoch': 3} {'type': 'loss', 'content': 0.1544228047132492, 'timestamp': '2025-09-10 02:56:15.091342', 'step': 16397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:15.150355', 'step': 16397, 'epoch': 3} {'type': 'loss', 'content': 0.041386112570762634, 'timestamp': '2025-09-10 02:56:15.152677', 'step': 16398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:15.209954', 'step': 16398, 'epoch': 3} {'type': 'loss', 'content': 0.11618722975254059, 'timestamp': '2025-09-10 02:56:15.212292', 'step': 16399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:15.270048', 'step': 16399, 'epoch': 3} {'type': 'loss', 'content': 0.12947817146778107, 'timestamp': '2025-09-10 02:56:15.276699', 'step': 16400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:15.330713', 'step': 16400, 'epoch': 3} {'type': 'loss', 'content': 0.10092633962631226, 'timestamp': '2025-09-10 02:56:15.332934', 'step': 16401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:15.388518', 'step': 16401, 'epoch': 3} {'type': 'loss', 'content': 0.07209271937608719, 'timestamp': '2025-09-10 02:56:15.390693', 'step': 16402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:15.444285', 'step': 16402, 'epoch': 3} {'type': 'loss', 'content': 0.07753541320562363, 'timestamp': '2025-09-10 02:56:15.446575', 'step': 16403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:15.501272', 'step': 16403, 'epoch': 3} {'type': 'loss', 'content': 0.0750715434551239, 'timestamp': '2025-09-10 02:56:15.507550', 'step': 16404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:15.561055', 'step': 16404, 'epoch': 3} {'type': 'loss', 'content': 0.09621939808130264, 'timestamp': '2025-09-10 02:56:15.563331', 'step': 16405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:15.617061', 'step': 16405, 'epoch': 3} {'type': 'loss', 'content': 0.06434055417776108, 'timestamp': '2025-09-10 02:56:15.619395', 'step': 16406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:15.674853', 'step': 16406, 'epoch': 3} {'type': 'loss', 'content': 0.058941952884197235, 'timestamp': '2025-09-10 02:56:15.677292', 'step': 16407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:15.737808', 'step': 16407, 'epoch': 3} {'type': 'loss', 'content': 0.12431467324495316, 'timestamp': '2025-09-10 02:56:15.745288', 'step': 16408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:15.805813', 'step': 16408, 'epoch': 3} {'type': 'loss', 'content': 0.12208603322505951, 'timestamp': '2025-09-10 02:56:15.808429', 'step': 16409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:15.869035', 'step': 16409, 'epoch': 3} {'type': 'loss', 'content': 0.11686661839485168, 'timestamp': '2025-09-10 02:56:15.871740', 'step': 16410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:15.932516', 'step': 16410, 'epoch': 3} {'type': 'loss', 'content': 0.09216533601284027, 'timestamp': '2025-09-10 02:56:15.934946', 'step': 16411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:15.994858', 'step': 16411, 'epoch': 3} {'type': 'loss', 'content': 0.0544310137629509, 'timestamp': '2025-09-10 02:56:16.001937', 'step': 16412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:16.058274', 'step': 16412, 'epoch': 3} {'type': 'loss', 'content': 0.14889445900917053, 'timestamp': '2025-09-10 02:56:16.060416', 'step': 16413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:16.114782', 'step': 16413, 'epoch': 3} {'type': 'loss', 'content': 0.05315409228205681, 'timestamp': '2025-09-10 02:56:16.116851', 'step': 16414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:16.170610', 'step': 16414, 'epoch': 3} {'type': 'loss', 'content': 0.08412893116474152, 'timestamp': '2025-09-10 02:56:16.172981', 'step': 16415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:16.227324', 'step': 16415, 'epoch': 3} {'type': 'loss', 'content': 0.20391541719436646, 'timestamp': '2025-09-10 02:56:16.233629', 'step': 16416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:16.286565', 'step': 16416, 'epoch': 3} {'type': 'loss', 'content': 0.0864381343126297, 'timestamp': '2025-09-10 02:56:16.288744', 'step': 16417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:16.342082', 'step': 16417, 'epoch': 3} {'type': 'loss', 'content': 0.07315842062234879, 'timestamp': '2025-09-10 02:56:16.344385', 'step': 16418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:16.397908', 'step': 16418, 'epoch': 3} {'type': 'loss', 'content': 0.040211740881204605, 'timestamp': '2025-09-10 02:56:16.400155', 'step': 16419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:16.454634', 'step': 16419, 'epoch': 3} {'type': 'loss', 'content': 0.09363778680562973, 'timestamp': '2025-09-10 02:56:16.460494', 'step': 16420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:16.514336', 'step': 16420, 'epoch': 3} {'type': 'loss', 'content': 0.15713649988174438, 'timestamp': '2025-09-10 02:56:16.516574', 'step': 16421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:16.580754', 'step': 16421, 'epoch': 3} {'type': 'loss', 'content': 0.15360960364341736, 'timestamp': '2025-09-10 02:56:16.583026', 'step': 16422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:16.637207', 'step': 16422, 'epoch': 3} {'type': 'loss', 'content': 0.10890693962574005, 'timestamp': '2025-09-10 02:56:16.639620', 'step': 16423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:16.704226', 'step': 16423, 'epoch': 3} {'type': 'loss', 'content': 0.05879826843738556, 'timestamp': '2025-09-10 02:56:16.710410', 'step': 16424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:16.766407', 'step': 16424, 'epoch': 3} {'type': 'loss', 'content': 0.12085223942995071, 'timestamp': '2025-09-10 02:56:16.768506', 'step': 16425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:16.821921', 'step': 16425, 'epoch': 3} {'type': 'loss', 'content': 0.14044973254203796, 'timestamp': '2025-09-10 02:56:16.824092', 'step': 16426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:16.877440', 'step': 16426, 'epoch': 3} {'type': 'loss', 'content': 0.022533709183335304, 'timestamp': '2025-09-10 02:56:16.879767', 'step': 16427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:16.933188', 'step': 16427, 'epoch': 3} {'type': 'loss', 'content': 0.08742420375347137, 'timestamp': '2025-09-10 02:56:16.939195', 'step': 16428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:16.991784', 'step': 16428, 'epoch': 3} {'type': 'loss', 'content': 0.13029736280441284, 'timestamp': '2025-09-10 02:56:16.994055', 'step': 16429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:17.047686', 'step': 16429, 'epoch': 3} {'type': 'loss', 'content': 0.07179789990186691, 'timestamp': '2025-09-10 02:56:17.050016', 'step': 16430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:56:17.104259', 'step': 16430, 'epoch': 3} {'type': 'loss', 'content': 0.04895171895623207, 'timestamp': '2025-09-10 02:56:17.106533', 'step': 16431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:17.162000', 'step': 16431, 'epoch': 3} {'type': 'loss', 'content': 0.13193485140800476, 'timestamp': '2025-09-10 02:56:17.169390', 'step': 16432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:17.223416', 'step': 16432, 'epoch': 3} {'type': 'loss', 'content': 0.11000952124595642, 'timestamp': '2025-09-10 02:56:17.225802', 'step': 16433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:17.280215', 'step': 16433, 'epoch': 3} {'type': 'loss', 'content': 0.22248202562332153, 'timestamp': '2025-09-10 02:56:17.282445', 'step': 16434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:17.335876', 'step': 16434, 'epoch': 3} {'type': 'loss', 'content': 0.06402464956045151, 'timestamp': '2025-09-10 02:56:17.338062', 'step': 16435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:17.391987', 'step': 16435, 'epoch': 3} {'type': 'loss', 'content': 0.13545314967632294, 'timestamp': '2025-09-10 02:56:17.397910', 'step': 16436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:17.451347', 'step': 16436, 'epoch': 3} {'type': 'loss', 'content': 0.06815293431282043, 'timestamp': '2025-09-10 02:56:17.453891', 'step': 16437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:17.507256', 'step': 16437, 'epoch': 3} {'type': 'loss', 'content': 0.1102522611618042, 'timestamp': '2025-09-10 02:56:17.509699', 'step': 16438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:17.563414', 'step': 16438, 'epoch': 3} {'type': 'loss', 'content': 0.17918065190315247, 'timestamp': '2025-09-10 02:56:17.565765', 'step': 16439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:17.618980', 'step': 16439, 'epoch': 3} {'type': 'loss', 'content': 0.04735558480024338, 'timestamp': '2025-09-10 02:56:17.624941', 'step': 16440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:17.678452', 'step': 16440, 'epoch': 3} {'type': 'loss', 'content': 0.014626200310885906, 'timestamp': '2025-09-10 02:56:17.680653', 'step': 16441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:17.733720', 'step': 16441, 'epoch': 3} {'type': 'loss', 'content': 0.06398994475603104, 'timestamp': '2025-09-10 02:56:17.735822', 'step': 16442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:17.790563', 'step': 16442, 'epoch': 3} {'type': 'loss', 'content': 0.07187316566705704, 'timestamp': '2025-09-10 02:56:17.792767', 'step': 16443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:17.847026', 'step': 16443, 'epoch': 3} {'type': 'loss', 'content': 0.10552586615085602, 'timestamp': '2025-09-10 02:56:17.852943', 'step': 16444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:17.906383', 'step': 16444, 'epoch': 3} {'type': 'loss', 'content': 0.13203927874565125, 'timestamp': '2025-09-10 02:56:17.908710', 'step': 16445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:17.962207', 'step': 16445, 'epoch': 3} {'type': 'loss', 'content': 0.15029241144657135, 'timestamp': '2025-09-10 02:56:17.964737', 'step': 16446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:18.021385', 'step': 16446, 'epoch': 3} {'type': 'loss', 'content': 0.11323335766792297, 'timestamp': '2025-09-10 02:56:18.023693', 'step': 16447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:18.080780', 'step': 16447, 'epoch': 3} {'type': 'loss', 'content': 0.07961323112249374, 'timestamp': '2025-09-10 02:56:18.087261', 'step': 16448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:18.142102', 'step': 16448, 'epoch': 3} {'type': 'loss', 'content': 0.08221983164548874, 'timestamp': '2025-09-10 02:56:18.144399', 'step': 16449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:18.199880', 'step': 16449, 'epoch': 3} {'type': 'loss', 'content': 0.05467061698436737, 'timestamp': '2025-09-10 02:56:18.202196', 'step': 16450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:18.256301', 'step': 16450, 'epoch': 3} {'type': 'loss', 'content': 0.08596140891313553, 'timestamp': '2025-09-10 02:56:18.258775', 'step': 16451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:18.312915', 'step': 16451, 'epoch': 3} {'type': 'loss', 'content': 0.07546129822731018, 'timestamp': '2025-09-10 02:56:18.319353', 'step': 16452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:18.374130', 'step': 16452, 'epoch': 3} {'type': 'loss', 'content': 0.08874901384115219, 'timestamp': '2025-09-10 02:56:18.376510', 'step': 16453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:18.430442', 'step': 16453, 'epoch': 3} {'type': 'loss', 'content': 0.06789682060480118, 'timestamp': '2025-09-10 02:56:18.432781', 'step': 16454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:18.489245', 'step': 16454, 'epoch': 3} {'type': 'loss', 'content': 0.14953377842903137, 'timestamp': '2025-09-10 02:56:18.491503', 'step': 16455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:18.547624', 'step': 16455, 'epoch': 3} {'type': 'loss', 'content': 0.055270493030548096, 'timestamp': '2025-09-10 02:56:18.554182', 'step': 16456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:18.612916', 'step': 16456, 'epoch': 3} {'type': 'loss', 'content': 0.06788050383329391, 'timestamp': '2025-09-10 02:56:18.617107', 'step': 16457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:18.672380', 'step': 16457, 'epoch': 3} {'type': 'loss', 'content': 0.18926899135112762, 'timestamp': '2025-09-10 02:56:18.676629', 'step': 16458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:18.731666', 'step': 16458, 'epoch': 3} {'type': 'loss', 'content': 0.07059582322835922, 'timestamp': '2025-09-10 02:56:18.737154', 'step': 16459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:18.797048', 'step': 16459, 'epoch': 3} {'type': 'loss', 'content': 0.0660741925239563, 'timestamp': '2025-09-10 02:56:18.804025', 'step': 16460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:18.862150', 'step': 16460, 'epoch': 3} {'type': 'loss', 'content': 0.1556771695613861, 'timestamp': '2025-09-10 02:56:18.864417', 'step': 16461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:18.920620', 'step': 16461, 'epoch': 3} {'type': 'loss', 'content': 0.12372394651174545, 'timestamp': '2025-09-10 02:56:18.922697', 'step': 16462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:18.979726', 'step': 16462, 'epoch': 3} {'type': 'loss', 'content': 0.05357592552900314, 'timestamp': '2025-09-10 02:56:18.983991', 'step': 16463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:19.040741', 'step': 16463, 'epoch': 3} {'type': 'loss', 'content': 0.05526527762413025, 'timestamp': '2025-09-10 02:56:19.047467', 'step': 16464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:19.102759', 'step': 16464, 'epoch': 3} {'type': 'loss', 'content': 0.185783252120018, 'timestamp': '2025-09-10 02:56:19.105111', 'step': 16465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:19.161876', 'step': 16465, 'epoch': 3} {'type': 'loss', 'content': 0.04469233751296997, 'timestamp': '2025-09-10 02:56:19.164434', 'step': 16466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:19.221612', 'step': 16466, 'epoch': 3} {'type': 'loss', 'content': 0.07608020305633545, 'timestamp': '2025-09-10 02:56:19.223994', 'step': 16467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:19.280139', 'step': 16467, 'epoch': 3} {'type': 'loss', 'content': 0.08943739533424377, 'timestamp': '2025-09-10 02:56:19.288725', 'step': 16468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:19.350740', 'step': 16468, 'epoch': 3} {'type': 'loss', 'content': 0.120395228266716, 'timestamp': '2025-09-10 02:56:19.353183', 'step': 16469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:19.410913', 'step': 16469, 'epoch': 3} {'type': 'loss', 'content': 0.0953124612569809, 'timestamp': '2025-09-10 02:56:19.415385', 'step': 16470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:19.474973', 'step': 16470, 'epoch': 3} {'type': 'loss', 'content': 0.07742255926132202, 'timestamp': '2025-09-10 02:56:19.484624', 'step': 16471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:19.546259', 'step': 16471, 'epoch': 3} {'type': 'loss', 'content': 0.03936361148953438, 'timestamp': '2025-09-10 02:56:19.553183', 'step': 16472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:19.608906', 'step': 16472, 'epoch': 3} {'type': 'loss', 'content': 0.201186865568161, 'timestamp': '2025-09-10 02:56:19.611235', 'step': 16473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:19.665934', 'step': 16473, 'epoch': 3} {'type': 'loss', 'content': 0.07621277868747711, 'timestamp': '2025-09-10 02:56:19.668330', 'step': 16474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:19.721965', 'step': 16474, 'epoch': 3} {'type': 'loss', 'content': 0.141197070479393, 'timestamp': '2025-09-10 02:56:19.724448', 'step': 16475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:19.779174', 'step': 16475, 'epoch': 3} {'type': 'loss', 'content': 0.1451452523469925, 'timestamp': '2025-09-10 02:56:19.787229', 'step': 16476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:19.845649', 'step': 16476, 'epoch': 3} {'type': 'loss', 'content': 0.10729194432497025, 'timestamp': '2025-09-10 02:56:19.847862', 'step': 16477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:19.901965', 'step': 16477, 'epoch': 3} {'type': 'loss', 'content': 0.0639890730381012, 'timestamp': '2025-09-10 02:56:19.904119', 'step': 16478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:19.963518', 'step': 16478, 'epoch': 3} {'type': 'loss', 'content': 0.08501015603542328, 'timestamp': '2025-09-10 02:56:19.965943', 'step': 16479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:20.020582', 'step': 16479, 'epoch': 3} {'type': 'loss', 'content': 0.13425536453723907, 'timestamp': '2025-09-10 02:56:20.027463', 'step': 16480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:20.083514', 'step': 16480, 'epoch': 3} {'type': 'loss', 'content': 0.06414065510034561, 'timestamp': '2025-09-10 02:56:20.085791', 'step': 16481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:20.145872', 'step': 16481, 'epoch': 3} {'type': 'loss', 'content': 0.05370331183075905, 'timestamp': '2025-09-10 02:56:20.148209', 'step': 16482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:20.208085', 'step': 16482, 'epoch': 3} {'type': 'loss', 'content': 0.09870650619268417, 'timestamp': '2025-09-10 02:56:20.210582', 'step': 16483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:20.266796', 'step': 16483, 'epoch': 3} {'type': 'loss', 'content': 0.042607132345438004, 'timestamp': '2025-09-10 02:56:20.273855', 'step': 16484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:20.328995', 'step': 16484, 'epoch': 3} {'type': 'loss', 'content': 0.08533212542533875, 'timestamp': '2025-09-10 02:56:20.334049', 'step': 16485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:20.390028', 'step': 16485, 'epoch': 3} {'type': 'loss', 'content': 0.11913547664880753, 'timestamp': '2025-09-10 02:56:20.392870', 'step': 16486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:20.446976', 'step': 16486, 'epoch': 3} {'type': 'loss', 'content': 0.06485160440206528, 'timestamp': '2025-09-10 02:56:20.450501', 'step': 16487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:20.505619', 'step': 16487, 'epoch': 3} {'type': 'loss', 'content': 0.0699983760714531, 'timestamp': '2025-09-10 02:56:20.512099', 'step': 16488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:20.570311', 'step': 16488, 'epoch': 3} {'type': 'loss', 'content': 0.1184876337647438, 'timestamp': '2025-09-10 02:56:20.574316', 'step': 16489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:20.631946', 'step': 16489, 'epoch': 3} {'type': 'loss', 'content': 0.13506054878234863, 'timestamp': '2025-09-10 02:56:20.634227', 'step': 16490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:20.688065', 'step': 16490, 'epoch': 3} {'type': 'loss', 'content': 0.09026779979467392, 'timestamp': '2025-09-10 02:56:20.692856', 'step': 16491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:20.753652', 'step': 16491, 'epoch': 3} {'type': 'loss', 'content': 0.08980149030685425, 'timestamp': '2025-09-10 02:56:20.759598', 'step': 16492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:20.813272', 'step': 16492, 'epoch': 3} {'type': 'loss', 'content': 0.06797539442777634, 'timestamp': '2025-09-10 02:56:20.815300', 'step': 16493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:56:20.872793', 'step': 16493, 'epoch': 3} {'type': 'loss', 'content': 0.07286585122346878, 'timestamp': '2025-09-10 02:56:20.877244', 'step': 16494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:20.932809', 'step': 16494, 'epoch': 3} {'type': 'loss', 'content': 0.1321672797203064, 'timestamp': '2025-09-10 02:56:20.935348', 'step': 16495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:20.999352', 'step': 16495, 'epoch': 3} {'type': 'loss', 'content': 0.13890224695205688, 'timestamp': '2025-09-10 02:56:21.005562', 'step': 16496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:21.070391', 'step': 16496, 'epoch': 3} {'type': 'loss', 'content': 0.11404246091842651, 'timestamp': '2025-09-10 02:56:21.072647', 'step': 16497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:21.126140', 'step': 16497, 'epoch': 3} {'type': 'loss', 'content': 0.11792689561843872, 'timestamp': '2025-09-10 02:56:21.128402', 'step': 16498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:21.181952', 'step': 16498, 'epoch': 3} {'type': 'loss', 'content': 0.07316916435956955, 'timestamp': '2025-09-10 02:56:21.185615', 'step': 16499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:21.240193', 'step': 16499, 'epoch': 3} {'type': 'loss', 'content': 0.09502401947975159, 'timestamp': '2025-09-10 02:56:21.246249', 'step': 16500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16500', 'timestamp': '2025-09-10 02:56:21.626302', 'step': 16500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:21.686262', 'step': 16500, 'epoch': 3} {'type': 'loss', 'content': 0.08573473244905472, 'timestamp': '2025-09-10 02:56:21.688549', 'step': 16501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:21.742861', 'step': 16501, 'epoch': 3} {'type': 'loss', 'content': 0.15268796682357788, 'timestamp': '2025-09-10 02:56:21.745152', 'step': 16502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:21.800726', 'step': 16502, 'epoch': 3} {'type': 'loss', 'content': 0.12035214155912399, 'timestamp': '2025-09-10 02:56:21.802796', 'step': 16503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:21.855531', 'step': 16503, 'epoch': 3} {'type': 'loss', 'content': 0.0881117433309555, 'timestamp': '2025-09-10 02:56:21.861494', 'step': 16504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:21.915047', 'step': 16504, 'epoch': 3} {'type': 'loss', 'content': 0.14164511859416962, 'timestamp': '2025-09-10 02:56:21.917301', 'step': 16505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:21.970962', 'step': 16505, 'epoch': 3} {'type': 'loss', 'content': 0.09504356980323792, 'timestamp': '2025-09-10 02:56:21.973564', 'step': 16506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:22.027765', 'step': 16506, 'epoch': 3} {'type': 'loss', 'content': 0.14416156709194183, 'timestamp': '2025-09-10 02:56:22.030135', 'step': 16507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:22.084026', 'step': 16507, 'epoch': 3} {'type': 'loss', 'content': 0.06303396821022034, 'timestamp': '2025-09-10 02:56:22.090395', 'step': 16508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:22.143863', 'step': 16508, 'epoch': 3} {'type': 'loss', 'content': 0.07856447249650955, 'timestamp': '2025-09-10 02:56:22.146349', 'step': 16509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:22.199711', 'step': 16509, 'epoch': 3} {'type': 'loss', 'content': 0.162063330411911, 'timestamp': '2025-09-10 02:56:22.202162', 'step': 16510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:22.255740', 'step': 16510, 'epoch': 3} {'type': 'loss', 'content': 0.10669673979282379, 'timestamp': '2025-09-10 02:56:22.258075', 'step': 16511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:22.312332', 'step': 16511, 'epoch': 3} {'type': 'loss', 'content': 0.13920839130878448, 'timestamp': '2025-09-10 02:56:22.318503', 'step': 16512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:22.372569', 'step': 16512, 'epoch': 3} {'type': 'loss', 'content': 0.10367651283740997, 'timestamp': '2025-09-10 02:56:22.374783', 'step': 16513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:22.429310', 'step': 16513, 'epoch': 3} {'type': 'loss', 'content': 0.14562661945819855, 'timestamp': '2025-09-10 02:56:22.431510', 'step': 16514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:22.486391', 'step': 16514, 'epoch': 3} {'type': 'loss', 'content': 0.07326648384332657, 'timestamp': '2025-09-10 02:56:22.488613', 'step': 16515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:22.543368', 'step': 16515, 'epoch': 3} {'type': 'loss', 'content': 0.10530877858400345, 'timestamp': '2025-09-10 02:56:22.549439', 'step': 16516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:22.603436', 'step': 16516, 'epoch': 3} {'type': 'loss', 'content': 0.12321044504642487, 'timestamp': '2025-09-10 02:56:22.605773', 'step': 16517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:22.660242', 'step': 16517, 'epoch': 3} {'type': 'loss', 'content': 0.07339759171009064, 'timestamp': '2025-09-10 02:56:22.662444', 'step': 16518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:22.718916', 'step': 16518, 'epoch': 3} {'type': 'loss', 'content': 0.11191300302743912, 'timestamp': '2025-09-10 02:56:22.721109', 'step': 16519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:22.775063', 'step': 16519, 'epoch': 3} {'type': 'loss', 'content': 0.1157599687576294, 'timestamp': '2025-09-10 02:56:22.781122', 'step': 16520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:22.834236', 'step': 16520, 'epoch': 3} {'type': 'loss', 'content': 0.07484287023544312, 'timestamp': '2025-09-10 02:56:22.836631', 'step': 16521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:22.890627', 'step': 16521, 'epoch': 3} {'type': 'loss', 'content': 0.12707985937595367, 'timestamp': '2025-09-10 02:56:22.893018', 'step': 16522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:56:22.949159', 'step': 16522, 'epoch': 3} {'type': 'loss', 'content': 0.10037045925855637, 'timestamp': '2025-09-10 02:56:22.951575', 'step': 16523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:23.004981', 'step': 16523, 'epoch': 3} {'type': 'loss', 'content': 0.1390787661075592, 'timestamp': '2025-09-10 02:56:23.011190', 'step': 16524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:23.064466', 'step': 16524, 'epoch': 3} {'type': 'loss', 'content': 0.04776580631732941, 'timestamp': '2025-09-10 02:56:23.066705', 'step': 16525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:23.120980', 'step': 16525, 'epoch': 3} {'type': 'loss', 'content': 0.05905194953083992, 'timestamp': '2025-09-10 02:56:23.123320', 'step': 16526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:23.179430', 'step': 16526, 'epoch': 3} {'type': 'loss', 'content': 0.04690079391002655, 'timestamp': '2025-09-10 02:56:23.181743', 'step': 16527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:23.237108', 'step': 16527, 'epoch': 3} {'type': 'loss', 'content': 0.1232772022485733, 'timestamp': '2025-09-10 02:56:23.243472', 'step': 16528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:23.297167', 'step': 16528, 'epoch': 3} {'type': 'loss', 'content': 0.07592179626226425, 'timestamp': '2025-09-10 02:56:23.299291', 'step': 16529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:23.353359', 'step': 16529, 'epoch': 3} {'type': 'loss', 'content': 0.07432332634925842, 'timestamp': '2025-09-10 02:56:23.355673', 'step': 16530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:23.409847', 'step': 16530, 'epoch': 3} {'type': 'loss', 'content': 0.1375558078289032, 'timestamp': '2025-09-10 02:56:23.412125', 'step': 16531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:23.466290', 'step': 16531, 'epoch': 3} {'type': 'loss', 'content': 0.07374781370162964, 'timestamp': '2025-09-10 02:56:23.472360', 'step': 16532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:23.526512', 'step': 16532, 'epoch': 3} {'type': 'loss', 'content': 0.0798782929778099, 'timestamp': '2025-09-10 02:56:23.528889', 'step': 16533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:23.582620', 'step': 16533, 'epoch': 3} {'type': 'loss', 'content': 0.062729611992836, 'timestamp': '2025-09-10 02:56:23.584584', 'step': 16534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:23.638879', 'step': 16534, 'epoch': 3} {'type': 'loss', 'content': 0.21465381979942322, 'timestamp': '2025-09-10 02:56:23.641194', 'step': 16535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:23.695619', 'step': 16535, 'epoch': 3} {'type': 'loss', 'content': 0.0774647668004036, 'timestamp': '2025-09-10 02:56:23.701759', 'step': 16536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:23.755517', 'step': 16536, 'epoch': 3} {'type': 'loss', 'content': 0.11191845685243607, 'timestamp': '2025-09-10 02:56:23.758058', 'step': 16537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:23.812396', 'step': 16537, 'epoch': 3} {'type': 'loss', 'content': 0.11790869385004044, 'timestamp': '2025-09-10 02:56:23.814890', 'step': 16538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:23.869373', 'step': 16538, 'epoch': 3} {'type': 'loss', 'content': 0.07652134448289871, 'timestamp': '2025-09-10 02:56:23.871675', 'step': 16539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:23.925331', 'step': 16539, 'epoch': 3} {'type': 'loss', 'content': 0.07451245933771133, 'timestamp': '2025-09-10 02:56:23.931336', 'step': 16540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:23.984534', 'step': 16540, 'epoch': 3} {'type': 'loss', 'content': 0.13235723972320557, 'timestamp': '2025-09-10 02:56:23.986896', 'step': 16541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:24.040137', 'step': 16541, 'epoch': 3} {'type': 'loss', 'content': 0.08889704942703247, 'timestamp': '2025-09-10 02:56:24.042342', 'step': 16542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:24.096712', 'step': 16542, 'epoch': 3} {'type': 'loss', 'content': 0.11869945377111435, 'timestamp': '2025-09-10 02:56:24.098975', 'step': 16543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:24.152554', 'step': 16543, 'epoch': 3} {'type': 'loss', 'content': 0.12484323233366013, 'timestamp': '2025-09-10 02:56:24.158517', 'step': 16544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:24.212803', 'step': 16544, 'epoch': 3} {'type': 'loss', 'content': 0.052006784826517105, 'timestamp': '2025-09-10 02:56:24.215026', 'step': 16545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:24.268946', 'step': 16545, 'epoch': 3} {'type': 'loss', 'content': 0.07111606746912003, 'timestamp': '2025-09-10 02:56:24.271145', 'step': 16546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:24.326959', 'step': 16546, 'epoch': 3} {'type': 'loss', 'content': 0.07684583216905594, 'timestamp': '2025-09-10 02:56:24.329213', 'step': 16547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:24.383003', 'step': 16547, 'epoch': 3} {'type': 'loss', 'content': 0.07513140141963959, 'timestamp': '2025-09-10 02:56:24.389219', 'step': 16548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:24.442904', 'step': 16548, 'epoch': 3} {'type': 'loss', 'content': 0.0877542793750763, 'timestamp': '2025-09-10 02:56:24.445181', 'step': 16549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:24.498826', 'step': 16549, 'epoch': 3} {'type': 'loss', 'content': 0.14179427921772003, 'timestamp': '2025-09-10 02:56:24.501073', 'step': 16550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:24.554969', 'step': 16550, 'epoch': 3} {'type': 'loss', 'content': 0.06357229501008987, 'timestamp': '2025-09-10 02:56:24.557404', 'step': 16551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:24.611145', 'step': 16551, 'epoch': 3} {'type': 'loss', 'content': 0.12021906673908234, 'timestamp': '2025-09-10 02:56:24.617491', 'step': 16552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:24.671457', 'step': 16552, 'epoch': 3} {'type': 'loss', 'content': 0.0921369418501854, 'timestamp': '2025-09-10 02:56:24.673902', 'step': 16553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:24.730297', 'step': 16553, 'epoch': 3} {'type': 'loss', 'content': 0.15948337316513062, 'timestamp': '2025-09-10 02:56:24.732531', 'step': 16554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:24.787947', 'step': 16554, 'epoch': 3} {'type': 'loss', 'content': 0.09594736248254776, 'timestamp': '2025-09-10 02:56:24.790208', 'step': 16555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:24.844092', 'step': 16555, 'epoch': 3} {'type': 'loss', 'content': 0.08688726276159286, 'timestamp': '2025-09-10 02:56:24.850238', 'step': 16556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:24.903648', 'step': 16556, 'epoch': 3} {'type': 'loss', 'content': 0.14767023921012878, 'timestamp': '2025-09-10 02:56:24.905891', 'step': 16557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:24.959692', 'step': 16557, 'epoch': 3} {'type': 'loss', 'content': 0.1453370898962021, 'timestamp': '2025-09-10 02:56:24.962021', 'step': 16558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:25.015931', 'step': 16558, 'epoch': 3} {'type': 'loss', 'content': 0.13520582020282745, 'timestamp': '2025-09-10 02:56:25.018419', 'step': 16559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:25.073694', 'step': 16559, 'epoch': 3} {'type': 'loss', 'content': 0.15234652161598206, 'timestamp': '2025-09-10 02:56:25.079687', 'step': 16560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:25.133356', 'step': 16560, 'epoch': 3} {'type': 'loss', 'content': 0.12427029013633728, 'timestamp': '2025-09-10 02:56:25.135769', 'step': 16561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:25.189504', 'step': 16561, 'epoch': 3} {'type': 'loss', 'content': 0.08683555573225021, 'timestamp': '2025-09-10 02:56:25.191889', 'step': 16562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:25.246337', 'step': 16562, 'epoch': 3} {'type': 'loss', 'content': 0.1026354730129242, 'timestamp': '2025-09-10 02:56:25.248706', 'step': 16563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:25.303291', 'step': 16563, 'epoch': 3} {'type': 'loss', 'content': 0.17839939892292023, 'timestamp': '2025-09-10 02:56:25.309361', 'step': 16564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:25.362906', 'step': 16564, 'epoch': 3} {'type': 'loss', 'content': 0.05736770108342171, 'timestamp': '2025-09-10 02:56:25.365116', 'step': 16565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:25.419803', 'step': 16565, 'epoch': 3} {'type': 'loss', 'content': 0.03768112137913704, 'timestamp': '2025-09-10 02:56:25.422341', 'step': 16566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:25.481150', 'step': 16566, 'epoch': 3} {'type': 'loss', 'content': 0.04721102491021156, 'timestamp': '2025-09-10 02:56:25.483809', 'step': 16567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:25.537606', 'step': 16567, 'epoch': 3} {'type': 'loss', 'content': 0.06455755978822708, 'timestamp': '2025-09-10 02:56:25.543643', 'step': 16568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:25.604434', 'step': 16568, 'epoch': 3} {'type': 'loss', 'content': 0.09642311930656433, 'timestamp': '2025-09-10 02:56:25.606693', 'step': 16569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:25.668186', 'step': 16569, 'epoch': 3} {'type': 'loss', 'content': 0.1259993612766266, 'timestamp': '2025-09-10 02:56:25.670405', 'step': 16570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:25.732085', 'step': 16570, 'epoch': 3} {'type': 'loss', 'content': 0.07505057752132416, 'timestamp': '2025-09-10 02:56:25.734482', 'step': 16571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:25.788234', 'step': 16571, 'epoch': 3} {'type': 'loss', 'content': 0.038739629089832306, 'timestamp': '2025-09-10 02:56:25.794131', 'step': 16572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:25.847342', 'step': 16572, 'epoch': 3} {'type': 'loss', 'content': 0.14454640448093414, 'timestamp': '2025-09-10 02:56:25.849572', 'step': 16573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:25.904771', 'step': 16573, 'epoch': 3} {'type': 'loss', 'content': 0.07840122282505035, 'timestamp': '2025-09-10 02:56:25.906983', 'step': 16574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:25.961116', 'step': 16574, 'epoch': 3} {'type': 'loss', 'content': 0.1495169848203659, 'timestamp': '2025-09-10 02:56:25.963350', 'step': 16575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:26.016081', 'step': 16575, 'epoch': 3} {'type': 'loss', 'content': 0.0868183821439743, 'timestamp': '2025-09-10 02:56:26.022085', 'step': 16576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:26.075665', 'step': 16576, 'epoch': 3} {'type': 'loss', 'content': 0.07026435434818268, 'timestamp': '2025-09-10 02:56:26.078309', 'step': 16577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:26.131769', 'step': 16577, 'epoch': 3} {'type': 'loss', 'content': 0.06384628266096115, 'timestamp': '2025-09-10 02:56:26.134310', 'step': 16578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:26.188784', 'step': 16578, 'epoch': 3} {'type': 'loss', 'content': 0.13464653491973877, 'timestamp': '2025-09-10 02:56:26.190945', 'step': 16579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:26.244672', 'step': 16579, 'epoch': 3} {'type': 'loss', 'content': 0.110979825258255, 'timestamp': '2025-09-10 02:56:26.251179', 'step': 16580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:26.305245', 'step': 16580, 'epoch': 3} {'type': 'loss', 'content': 0.0479302704334259, 'timestamp': '2025-09-10 02:56:26.307639', 'step': 16581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:26.364374', 'step': 16581, 'epoch': 3} {'type': 'loss', 'content': 0.08978768438100815, 'timestamp': '2025-09-10 02:56:26.366650', 'step': 16582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:26.421399', 'step': 16582, 'epoch': 3} {'type': 'loss', 'content': 0.06272479146718979, 'timestamp': '2025-09-10 02:56:26.423705', 'step': 16583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:26.480249', 'step': 16583, 'epoch': 3} {'type': 'loss', 'content': 0.10293988138437271, 'timestamp': '2025-09-10 02:56:26.486385', 'step': 16584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:26.539376', 'step': 16584, 'epoch': 3} {'type': 'loss', 'content': 0.08825311064720154, 'timestamp': '2025-09-10 02:56:26.541702', 'step': 16585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:26.595872', 'step': 16585, 'epoch': 3} {'type': 'loss', 'content': 0.06571218371391296, 'timestamp': '2025-09-10 02:56:26.598129', 'step': 16586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:26.652547', 'step': 16586, 'epoch': 3} {'type': 'loss', 'content': 0.1183469220995903, 'timestamp': '2025-09-10 02:56:26.654906', 'step': 16587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:26.708503', 'step': 16587, 'epoch': 3} {'type': 'loss', 'content': 0.11556733399629593, 'timestamp': '2025-09-10 02:56:26.714461', 'step': 16588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:26.767871', 'step': 16588, 'epoch': 3} {'type': 'loss', 'content': 0.04044313728809357, 'timestamp': '2025-09-10 02:56:26.770315', 'step': 16589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:26.823708', 'step': 16589, 'epoch': 3} {'type': 'loss', 'content': 0.08319568634033203, 'timestamp': '2025-09-10 02:56:26.825957', 'step': 16590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:26.882892', 'step': 16590, 'epoch': 3} {'type': 'loss', 'content': 0.09608907252550125, 'timestamp': '2025-09-10 02:56:26.884894', 'step': 16591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:26.943483', 'step': 16591, 'epoch': 3} {'type': 'loss', 'content': 0.1021232008934021, 'timestamp': '2025-09-10 02:56:26.950155', 'step': 16592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:27.007391', 'step': 16592, 'epoch': 3} {'type': 'loss', 'content': 0.07479288429021835, 'timestamp': '2025-09-10 02:56:27.009486', 'step': 16593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:27.064618', 'step': 16593, 'epoch': 3} {'type': 'loss', 'content': 0.07579144835472107, 'timestamp': '2025-09-10 02:56:27.067066', 'step': 16594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:27.122792', 'step': 16594, 'epoch': 3} {'type': 'loss', 'content': 0.13886649906635284, 'timestamp': '2025-09-10 02:56:27.125190', 'step': 16595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:27.180187', 'step': 16595, 'epoch': 3} {'type': 'loss', 'content': 0.0688503235578537, 'timestamp': '2025-09-10 02:56:27.186699', 'step': 16596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:27.241815', 'step': 16596, 'epoch': 3} {'type': 'loss', 'content': 0.056277964264154434, 'timestamp': '2025-09-10 02:56:27.244074', 'step': 16597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:27.298835', 'step': 16597, 'epoch': 3} {'type': 'loss', 'content': 0.09604106843471527, 'timestamp': '2025-09-10 02:56:27.300957', 'step': 16598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:56:27.358478', 'step': 16598, 'epoch': 3} {'type': 'loss', 'content': 0.07684261351823807, 'timestamp': '2025-09-10 02:56:27.360666', 'step': 16599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:27.415771', 'step': 16599, 'epoch': 3} {'type': 'loss', 'content': 0.11792654544115067, 'timestamp': '2025-09-10 02:56:27.421643', 'step': 16600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:27.475843', 'step': 16600, 'epoch': 3} {'type': 'loss', 'content': 0.08205317705869675, 'timestamp': '2025-09-10 02:56:27.477899', 'step': 16601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:27.533470', 'step': 16601, 'epoch': 3} {'type': 'loss', 'content': 0.05660681426525116, 'timestamp': '2025-09-10 02:56:27.535714', 'step': 16602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:27.589377', 'step': 16602, 'epoch': 3} {'type': 'loss', 'content': 0.06763528287410736, 'timestamp': '2025-09-10 02:56:27.591767', 'step': 16603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:27.645849', 'step': 16603, 'epoch': 3} {'type': 'loss', 'content': 0.1472041755914688, 'timestamp': '2025-09-10 02:56:27.652056', 'step': 16604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:27.709061', 'step': 16604, 'epoch': 3} {'type': 'loss', 'content': 0.15509845316410065, 'timestamp': '2025-09-10 02:56:27.711042', 'step': 16605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:27.768004', 'step': 16605, 'epoch': 3} {'type': 'loss', 'content': 0.05818716436624527, 'timestamp': '2025-09-10 02:56:27.770128', 'step': 16606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:27.825392', 'step': 16606, 'epoch': 3} {'type': 'loss', 'content': 0.17978784441947937, 'timestamp': '2025-09-10 02:56:27.827672', 'step': 16607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:27.884512', 'step': 16607, 'epoch': 3} {'type': 'loss', 'content': 0.020325517281889915, 'timestamp': '2025-09-10 02:56:27.890756', 'step': 16608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:27.944771', 'step': 16608, 'epoch': 3} {'type': 'loss', 'content': 0.04801035299897194, 'timestamp': '2025-09-10 02:56:27.947176', 'step': 16609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:28.001604', 'step': 16609, 'epoch': 3} {'type': 'loss', 'content': 0.12115003168582916, 'timestamp': '2025-09-10 02:56:28.003993', 'step': 16610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:28.069911', 'step': 16610, 'epoch': 3} {'type': 'loss', 'content': 0.1708165556192398, 'timestamp': '2025-09-10 02:56:28.072238', 'step': 16611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:28.128964', 'step': 16611, 'epoch': 3} {'type': 'loss', 'content': 0.05432834476232529, 'timestamp': '2025-09-10 02:56:28.135075', 'step': 16612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:28.190974', 'step': 16612, 'epoch': 3} {'type': 'loss', 'content': 0.15221315622329712, 'timestamp': '2025-09-10 02:56:28.193390', 'step': 16613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:28.248360', 'step': 16613, 'epoch': 3} {'type': 'loss', 'content': 0.08752773702144623, 'timestamp': '2025-09-10 02:56:28.250568', 'step': 16614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:28.304925', 'step': 16614, 'epoch': 3} {'type': 'loss', 'content': 0.07141005247831345, 'timestamp': '2025-09-10 02:56:28.307284', 'step': 16615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:28.365840', 'step': 16615, 'epoch': 3} {'type': 'loss', 'content': 0.06593959778547287, 'timestamp': '2025-09-10 02:56:28.371654', 'step': 16616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:28.429308', 'step': 16616, 'epoch': 3} {'type': 'loss', 'content': 0.10437959432601929, 'timestamp': '2025-09-10 02:56:28.431392', 'step': 16617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:28.486634', 'step': 16617, 'epoch': 3} {'type': 'loss', 'content': 0.08176696300506592, 'timestamp': '2025-09-10 02:56:28.488725', 'step': 16618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:28.545899', 'step': 16618, 'epoch': 3} {'type': 'loss', 'content': 0.0572274886071682, 'timestamp': '2025-09-10 02:56:28.547996', 'step': 16619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:28.605560', 'step': 16619, 'epoch': 3} {'type': 'loss', 'content': 0.18751226365566254, 'timestamp': '2025-09-10 02:56:28.611346', 'step': 16620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:28.667706', 'step': 16620, 'epoch': 3} {'type': 'loss', 'content': 0.03893372416496277, 'timestamp': '2025-09-10 02:56:28.669750', 'step': 16621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:28.730000', 'step': 16621, 'epoch': 3} {'type': 'loss', 'content': 0.08391319215297699, 'timestamp': '2025-09-10 02:56:28.732418', 'step': 16622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:28.788623', 'step': 16622, 'epoch': 3} {'type': 'loss', 'content': 0.06639370322227478, 'timestamp': '2025-09-10 02:56:28.790957', 'step': 16623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:28.846538', 'step': 16623, 'epoch': 3} {'type': 'loss', 'content': 0.03773730620741844, 'timestamp': '2025-09-10 02:56:28.852719', 'step': 16624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:28.907220', 'step': 16624, 'epoch': 3} {'type': 'loss', 'content': 0.19230729341506958, 'timestamp': '2025-09-10 02:56:28.909244', 'step': 16625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:28.966878', 'step': 16625, 'epoch': 3} {'type': 'loss', 'content': 0.1674203723669052, 'timestamp': '2025-09-10 02:56:28.969106', 'step': 16626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:29.032321', 'step': 16626, 'epoch': 3} {'type': 'loss', 'content': 0.07211901992559433, 'timestamp': '2025-09-10 02:56:29.034685', 'step': 16627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:29.095137', 'step': 16627, 'epoch': 3} {'type': 'loss', 'content': 0.10062512755393982, 'timestamp': '2025-09-10 02:56:29.101094', 'step': 16628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:29.155345', 'step': 16628, 'epoch': 3} {'type': 'loss', 'content': 0.08328725397586823, 'timestamp': '2025-09-10 02:56:29.157960', 'step': 16629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:29.216273', 'step': 16629, 'epoch': 3} {'type': 'loss', 'content': 0.09863247722387314, 'timestamp': '2025-09-10 02:56:29.220784', 'step': 16630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:56:29.281583', 'step': 16630, 'epoch': 3} {'type': 'loss', 'content': 0.10991974920034409, 'timestamp': '2025-09-10 02:56:29.283696', 'step': 16631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:29.342796', 'step': 16631, 'epoch': 3} {'type': 'loss', 'content': 0.09268389642238617, 'timestamp': '2025-09-10 02:56:29.348642', 'step': 16632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:29.414911', 'step': 16632, 'epoch': 3} {'type': 'loss', 'content': 0.029255958274006844, 'timestamp': '2025-09-10 02:56:29.417105', 'step': 16633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:29.472841', 'step': 16633, 'epoch': 3} {'type': 'loss', 'content': 0.17232656478881836, 'timestamp': '2025-09-10 02:56:29.479118', 'step': 16634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:29.534127', 'step': 16634, 'epoch': 3} {'type': 'loss', 'content': 0.15396535396575928, 'timestamp': '2025-09-10 02:56:29.536252', 'step': 16635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:29.591897', 'step': 16635, 'epoch': 3} {'type': 'loss', 'content': 0.03955390304327011, 'timestamp': '2025-09-10 02:56:29.598022', 'step': 16636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:29.651966', 'step': 16636, 'epoch': 3} {'type': 'loss', 'content': 0.0636795163154602, 'timestamp': '2025-09-10 02:56:29.659917', 'step': 16637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:29.718266', 'step': 16637, 'epoch': 3} {'type': 'loss', 'content': 0.09594446420669556, 'timestamp': '2025-09-10 02:56:29.720650', 'step': 16638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:29.775284', 'step': 16638, 'epoch': 3} {'type': 'loss', 'content': 0.2391359806060791, 'timestamp': '2025-09-10 02:56:29.777603', 'step': 16639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:29.831160', 'step': 16639, 'epoch': 3} {'type': 'loss', 'content': 0.07286833226680756, 'timestamp': '2025-09-10 02:56:29.836626', 'step': 16640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:29.890575', 'step': 16640, 'epoch': 3} {'type': 'loss', 'content': 0.0712757334113121, 'timestamp': '2025-09-10 02:56:29.892800', 'step': 16641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:29.952525', 'step': 16641, 'epoch': 3} {'type': 'loss', 'content': 0.11526305228471756, 'timestamp': '2025-09-10 02:56:29.954725', 'step': 16642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:30.017027', 'step': 16642, 'epoch': 3} {'type': 'loss', 'content': 0.11734268814325333, 'timestamp': '2025-09-10 02:56:30.019331', 'step': 16643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:30.077472', 'step': 16643, 'epoch': 3} {'type': 'loss', 'content': 0.08643513917922974, 'timestamp': '2025-09-10 02:56:30.084980', 'step': 16644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:30.141699', 'step': 16644, 'epoch': 3} {'type': 'loss', 'content': 0.060117799788713455, 'timestamp': '2025-09-10 02:56:30.145463', 'step': 16645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:30.202773', 'step': 16645, 'epoch': 3} {'type': 'loss', 'content': 0.10005093365907669, 'timestamp': '2025-09-10 02:56:30.204728', 'step': 16646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:30.261071', 'step': 16646, 'epoch': 3} {'type': 'loss', 'content': 0.09063410013914108, 'timestamp': '2025-09-10 02:56:30.263063', 'step': 16647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:30.319014', 'step': 16647, 'epoch': 3} {'type': 'loss', 'content': 0.07279197871685028, 'timestamp': '2025-09-10 02:56:30.324600', 'step': 16648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:30.380487', 'step': 16648, 'epoch': 3} {'type': 'loss', 'content': 0.1446215808391571, 'timestamp': '2025-09-10 02:56:30.382382', 'step': 16649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:30.447488', 'step': 16649, 'epoch': 3} {'type': 'loss', 'content': 0.0783204510807991, 'timestamp': '2025-09-10 02:56:30.449767', 'step': 16650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:30.505780', 'step': 16650, 'epoch': 3} {'type': 'loss', 'content': 0.04773549735546112, 'timestamp': '2025-09-10 02:56:30.507993', 'step': 16651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:30.563342', 'step': 16651, 'epoch': 3} {'type': 'loss', 'content': 0.10192891210317612, 'timestamp': '2025-09-10 02:56:30.570272', 'step': 16652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:30.625563', 'step': 16652, 'epoch': 3} {'type': 'loss', 'content': 0.15898855030536652, 'timestamp': '2025-09-10 02:56:30.627909', 'step': 16653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:30.682837', 'step': 16653, 'epoch': 3} {'type': 'loss', 'content': 0.10121481120586395, 'timestamp': '2025-09-10 02:56:30.684976', 'step': 16654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:30.751100', 'step': 16654, 'epoch': 3} {'type': 'loss', 'content': 0.10020384937524796, 'timestamp': '2025-09-10 02:56:30.754323', 'step': 16655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:30.814606', 'step': 16655, 'epoch': 3} {'type': 'loss', 'content': 0.10892961174249649, 'timestamp': '2025-09-10 02:56:30.820569', 'step': 16656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:30.874063', 'step': 16656, 'epoch': 3} {'type': 'loss', 'content': 0.039118628948926926, 'timestamp': '2025-09-10 02:56:30.876457', 'step': 16657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:30.935386', 'step': 16657, 'epoch': 3} {'type': 'loss', 'content': 0.12245547771453857, 'timestamp': '2025-09-10 02:56:30.937663', 'step': 16658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:30.993123', 'step': 16658, 'epoch': 3} {'type': 'loss', 'content': 0.12810227274894714, 'timestamp': '2025-09-10 02:56:30.995162', 'step': 16659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:31.050960', 'step': 16659, 'epoch': 3} {'type': 'loss', 'content': 0.15912173688411713, 'timestamp': '2025-09-10 02:56:31.056942', 'step': 16660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:31.110151', 'step': 16660, 'epoch': 3} {'type': 'loss', 'content': 0.0871979221701622, 'timestamp': '2025-09-10 02:56:31.114654', 'step': 16661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:31.170278', 'step': 16661, 'epoch': 3} {'type': 'loss', 'content': 0.05343217775225639, 'timestamp': '2025-09-10 02:56:31.172293', 'step': 16662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:31.226375', 'step': 16662, 'epoch': 3} {'type': 'loss', 'content': 0.08134150505065918, 'timestamp': '2025-09-10 02:56:31.228266', 'step': 16663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:31.282511', 'step': 16663, 'epoch': 3} {'type': 'loss', 'content': 0.041933443397283554, 'timestamp': '2025-09-10 02:56:31.288513', 'step': 16664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:31.341913', 'step': 16664, 'epoch': 3} {'type': 'loss', 'content': 0.13606642186641693, 'timestamp': '2025-09-10 02:56:31.344026', 'step': 16665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:31.399052', 'step': 16665, 'epoch': 3} {'type': 'loss', 'content': 0.10133273899555206, 'timestamp': '2025-09-10 02:56:31.401800', 'step': 16666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:31.458627', 'step': 16666, 'epoch': 3} {'type': 'loss', 'content': 0.06115847826004028, 'timestamp': '2025-09-10 02:56:31.460995', 'step': 16667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:31.517620', 'step': 16667, 'epoch': 3} {'type': 'loss', 'content': 0.07987778633832932, 'timestamp': '2025-09-10 02:56:31.523548', 'step': 16668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:31.580529', 'step': 16668, 'epoch': 3} {'type': 'loss', 'content': 0.05445946380496025, 'timestamp': '2025-09-10 02:56:31.584326', 'step': 16669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:31.641340', 'step': 16669, 'epoch': 3} {'type': 'loss', 'content': 0.10516361147165298, 'timestamp': '2025-09-10 02:56:31.643570', 'step': 16670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:31.698088', 'step': 16670, 'epoch': 3} {'type': 'loss', 'content': 0.11349541693925858, 'timestamp': '2025-09-10 02:56:31.700602', 'step': 16671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:31.765864', 'step': 16671, 'epoch': 3} {'type': 'loss', 'content': 0.10362301766872406, 'timestamp': '2025-09-10 02:56:31.772001', 'step': 16672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:31.827159', 'step': 16672, 'epoch': 3} {'type': 'loss', 'content': 0.11417729407548904, 'timestamp': '2025-09-10 02:56:31.829409', 'step': 16673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:31.887719', 'step': 16673, 'epoch': 3} {'type': 'loss', 'content': 0.08726184815168381, 'timestamp': '2025-09-10 02:56:31.889679', 'step': 16674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:31.946701', 'step': 16674, 'epoch': 3} {'type': 'loss', 'content': 0.04575926437973976, 'timestamp': '2025-09-10 02:56:31.948920', 'step': 16675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:32.004546', 'step': 16675, 'epoch': 3} {'type': 'loss', 'content': 0.05678441375494003, 'timestamp': '2025-09-10 02:56:32.010590', 'step': 16676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:32.066064', 'step': 16676, 'epoch': 3} {'type': 'loss', 'content': 0.07678881287574768, 'timestamp': '2025-09-10 02:56:32.068238', 'step': 16677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:32.124049', 'step': 16677, 'epoch': 3} {'type': 'loss', 'content': 0.06583818793296814, 'timestamp': '2025-09-10 02:56:32.126302', 'step': 16678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:32.182088', 'step': 16678, 'epoch': 3} {'type': 'loss', 'content': 0.08746854960918427, 'timestamp': '2025-09-10 02:56:32.184283', 'step': 16679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:32.238922', 'step': 16679, 'epoch': 3} {'type': 'loss', 'content': 0.08543183654546738, 'timestamp': '2025-09-10 02:56:32.244744', 'step': 16680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:32.299899', 'step': 16680, 'epoch': 3} {'type': 'loss', 'content': 0.09073412418365479, 'timestamp': '2025-09-10 02:56:32.301852', 'step': 16681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:32.356282', 'step': 16681, 'epoch': 3} {'type': 'loss', 'content': 0.10025360435247421, 'timestamp': '2025-09-10 02:56:32.358401', 'step': 16682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:32.412856', 'step': 16682, 'epoch': 3} {'type': 'loss', 'content': 0.10204250365495682, 'timestamp': '2025-09-10 02:56:32.415116', 'step': 16683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:32.471033', 'step': 16683, 'epoch': 3} {'type': 'loss', 'content': 0.14162617921829224, 'timestamp': '2025-09-10 02:56:32.477116', 'step': 16684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:32.532349', 'step': 16684, 'epoch': 3} {'type': 'loss', 'content': 0.07719138264656067, 'timestamp': '2025-09-10 02:56:32.534360', 'step': 16685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:32.588530', 'step': 16685, 'epoch': 3} {'type': 'loss', 'content': 0.06876889616250992, 'timestamp': '2025-09-10 02:56:32.590860', 'step': 16686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:32.645480', 'step': 16686, 'epoch': 3} {'type': 'loss', 'content': 0.06301908195018768, 'timestamp': '2025-09-10 02:56:32.647657', 'step': 16687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:32.702459', 'step': 16687, 'epoch': 3} {'type': 'loss', 'content': 0.1450461447238922, 'timestamp': '2025-09-10 02:56:32.708686', 'step': 16688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:32.762572', 'step': 16688, 'epoch': 3} {'type': 'loss', 'content': 0.06963705271482468, 'timestamp': '2025-09-10 02:56:32.765296', 'step': 16689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:32.821481', 'step': 16689, 'epoch': 3} {'type': 'loss', 'content': 0.09315545856952667, 'timestamp': '2025-09-10 02:56:32.823478', 'step': 16690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:32.878190', 'step': 16690, 'epoch': 3} {'type': 'loss', 'content': 0.08381660282611847, 'timestamp': '2025-09-10 02:56:32.880455', 'step': 16691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:32.936133', 'step': 16691, 'epoch': 3} {'type': 'loss', 'content': 0.055792149156332016, 'timestamp': '2025-09-10 02:56:32.942550', 'step': 16692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:33.001321', 'step': 16692, 'epoch': 3} {'type': 'loss', 'content': 0.06577514111995697, 'timestamp': '2025-09-10 02:56:33.003780', 'step': 16693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:33.063984', 'step': 16693, 'epoch': 3} {'type': 'loss', 'content': 0.13881564140319824, 'timestamp': '2025-09-10 02:56:33.065887', 'step': 16694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:33.129353', 'step': 16694, 'epoch': 3} {'type': 'loss', 'content': 0.14581027626991272, 'timestamp': '2025-09-10 02:56:33.131773', 'step': 16695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:33.192929', 'step': 16695, 'epoch': 3} {'type': 'loss', 'content': 0.1445423811674118, 'timestamp': '2025-09-10 02:56:33.200361', 'step': 16696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:33.254221', 'step': 16696, 'epoch': 3} {'type': 'loss', 'content': 0.12194322049617767, 'timestamp': '2025-09-10 02:56:33.256111', 'step': 16697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:33.310969', 'step': 16697, 'epoch': 3} {'type': 'loss', 'content': 0.1436285823583603, 'timestamp': '2025-09-10 02:56:33.312940', 'step': 16698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:33.368512', 'step': 16698, 'epoch': 3} {'type': 'loss', 'content': 0.07873423397541046, 'timestamp': '2025-09-10 02:56:33.370489', 'step': 16699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:33.425420', 'step': 16699, 'epoch': 3} {'type': 'loss', 'content': 0.05203517526388168, 'timestamp': '2025-09-10 02:56:33.431061', 'step': 16700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:33.485069', 'step': 16700, 'epoch': 3} {'type': 'loss', 'content': 0.159525066614151, 'timestamp': '2025-09-10 02:56:33.486981', 'step': 16701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:33.540464', 'step': 16701, 'epoch': 3} {'type': 'loss', 'content': 0.09621058404445648, 'timestamp': '2025-09-10 02:56:33.542617', 'step': 16702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:33.599070', 'step': 16702, 'epoch': 3} {'type': 'loss', 'content': 0.08762297034263611, 'timestamp': '2025-09-10 02:56:33.601368', 'step': 16703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:33.657432', 'step': 16703, 'epoch': 3} {'type': 'loss', 'content': 0.11882254481315613, 'timestamp': '2025-09-10 02:56:33.663682', 'step': 16704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:33.718196', 'step': 16704, 'epoch': 3} {'type': 'loss', 'content': 0.11623811721801758, 'timestamp': '2025-09-10 02:56:33.720849', 'step': 16705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:33.775528', 'step': 16705, 'epoch': 3} {'type': 'loss', 'content': 0.03844509646296501, 'timestamp': '2025-09-10 02:56:33.777836', 'step': 16706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:33.832862', 'step': 16706, 'epoch': 3} {'type': 'loss', 'content': 0.1000659242272377, 'timestamp': '2025-09-10 02:56:33.835127', 'step': 16707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:33.891896', 'step': 16707, 'epoch': 3} {'type': 'loss', 'content': 0.08277301490306854, 'timestamp': '2025-09-10 02:56:33.897895', 'step': 16708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:33.953793', 'step': 16708, 'epoch': 3} {'type': 'loss', 'content': 0.07601901143789291, 'timestamp': '2025-09-10 02:56:33.955924', 'step': 16709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:34.009923', 'step': 16709, 'epoch': 3} {'type': 'loss', 'content': 0.13081571459770203, 'timestamp': '2025-09-10 02:56:34.013424', 'step': 16710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:34.069095', 'step': 16710, 'epoch': 3} {'type': 'loss', 'content': 0.08896009624004364, 'timestamp': '2025-09-10 02:56:34.071300', 'step': 16711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:34.129640', 'step': 16711, 'epoch': 3} {'type': 'loss', 'content': 0.08092651516199112, 'timestamp': '2025-09-10 02:56:34.135801', 'step': 16712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:34.197493', 'step': 16712, 'epoch': 3} {'type': 'loss', 'content': 0.08893438428640366, 'timestamp': '2025-09-10 02:56:34.199856', 'step': 16713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:34.259532', 'step': 16713, 'epoch': 3} {'type': 'loss', 'content': 0.12675581872463226, 'timestamp': '2025-09-10 02:56:34.261710', 'step': 16714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:34.322547', 'step': 16714, 'epoch': 3} {'type': 'loss', 'content': 0.05848444998264313, 'timestamp': '2025-09-10 02:56:34.324788', 'step': 16715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:34.379498', 'step': 16715, 'epoch': 3} {'type': 'loss', 'content': 0.14560890197753906, 'timestamp': '2025-09-10 02:56:34.385580', 'step': 16716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:34.440185', 'step': 16716, 'epoch': 3} {'type': 'loss', 'content': 0.1818990856409073, 'timestamp': '2025-09-10 02:56:34.442377', 'step': 16717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:34.499061', 'step': 16717, 'epoch': 3} {'type': 'loss', 'content': 0.1898747831583023, 'timestamp': '2025-09-10 02:56:34.501100', 'step': 16718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:34.555746', 'step': 16718, 'epoch': 3} {'type': 'loss', 'content': 0.11714103072881699, 'timestamp': '2025-09-10 02:56:34.557608', 'step': 16719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:34.610827', 'step': 16719, 'epoch': 3} {'type': 'loss', 'content': 0.08643022179603577, 'timestamp': '2025-09-10 02:56:34.616568', 'step': 16720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:34.670092', 'step': 16720, 'epoch': 3} {'type': 'loss', 'content': 0.06789771467447281, 'timestamp': '2025-09-10 02:56:34.672228', 'step': 16721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:34.727365', 'step': 16721, 'epoch': 3} {'type': 'loss', 'content': 0.05431608855724335, 'timestamp': '2025-09-10 02:56:34.729666', 'step': 16722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:34.785211', 'step': 16722, 'epoch': 3} {'type': 'loss', 'content': 0.0927601233124733, 'timestamp': '2025-09-10 02:56:34.787456', 'step': 16723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:34.842223', 'step': 16723, 'epoch': 3} {'type': 'loss', 'content': 0.1518210619688034, 'timestamp': '2025-09-10 02:56:34.854986', 'step': 16724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:34.910127', 'step': 16724, 'epoch': 3} {'type': 'loss', 'content': 0.14147329330444336, 'timestamp': '2025-09-10 02:56:34.912698', 'step': 16725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:34.967619', 'step': 16725, 'epoch': 3} {'type': 'loss', 'content': 0.15695063769817352, 'timestamp': '2025-09-10 02:56:34.969754', 'step': 16726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:35.023985', 'step': 16726, 'epoch': 3} {'type': 'loss', 'content': 0.10887842625379562, 'timestamp': '2025-09-10 02:56:35.028411', 'step': 16727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:35.088476', 'step': 16727, 'epoch': 3} {'type': 'loss', 'content': 0.08308420330286026, 'timestamp': '2025-09-10 02:56:35.094756', 'step': 16728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:35.148916', 'step': 16728, 'epoch': 3} {'type': 'loss', 'content': 0.05992132052779198, 'timestamp': '2025-09-10 02:56:35.151164', 'step': 16729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:56:35.205478', 'step': 16729, 'epoch': 3} {'type': 'loss', 'content': 0.10369370877742767, 'timestamp': '2025-09-10 02:56:35.207762', 'step': 16730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:35.261808', 'step': 16730, 'epoch': 3} {'type': 'loss', 'content': 0.04902315139770508, 'timestamp': '2025-09-10 02:56:35.264897', 'step': 16731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:35.319676', 'step': 16731, 'epoch': 3} {'type': 'loss', 'content': 0.11129189282655716, 'timestamp': '2025-09-10 02:56:35.325935', 'step': 16732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:35.379517', 'step': 16732, 'epoch': 3} {'type': 'loss', 'content': 0.08551502227783203, 'timestamp': '2025-09-10 02:56:35.383620', 'step': 16733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:56:35.438337', 'step': 16733, 'epoch': 3} {'type': 'loss', 'content': 0.06557489186525345, 'timestamp': '2025-09-10 02:56:35.445474', 'step': 16734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:35.508480', 'step': 16734, 'epoch': 3} {'type': 'loss', 'content': 0.04936571791768074, 'timestamp': '2025-09-10 02:56:35.513211', 'step': 16735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:35.571221', 'step': 16735, 'epoch': 3} {'type': 'loss', 'content': 0.09392812103033066, 'timestamp': '2025-09-10 02:56:35.577408', 'step': 16736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:35.637379', 'step': 16736, 'epoch': 3} {'type': 'loss', 'content': 0.10729753226041794, 'timestamp': '2025-09-10 02:56:35.639628', 'step': 16737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:35.701254', 'step': 16737, 'epoch': 3} {'type': 'loss', 'content': 0.03379174694418907, 'timestamp': '2025-09-10 02:56:35.703602', 'step': 16738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:35.773357', 'step': 16738, 'epoch': 3} {'type': 'loss', 'content': 0.13104267418384552, 'timestamp': '2025-09-10 02:56:35.777701', 'step': 16739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:35.837412', 'step': 16739, 'epoch': 3} {'type': 'loss', 'content': 0.14850468933582306, 'timestamp': '2025-09-10 02:56:35.843611', 'step': 16740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:35.906195', 'step': 16740, 'epoch': 3} {'type': 'loss', 'content': 0.04623567685484886, 'timestamp': '2025-09-10 02:56:35.908302', 'step': 16741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:35.965275', 'step': 16741, 'epoch': 3} {'type': 'loss', 'content': 0.12432622164487839, 'timestamp': '2025-09-10 02:56:35.967104', 'step': 16742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:36.024198', 'step': 16742, 'epoch': 3} {'type': 'loss', 'content': 0.1025199368596077, 'timestamp': '2025-09-10 02:56:36.026408', 'step': 16743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:36.083175', 'step': 16743, 'epoch': 3} {'type': 'loss', 'content': 0.08981825411319733, 'timestamp': '2025-09-10 02:56:36.089447', 'step': 16744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:36.147224', 'step': 16744, 'epoch': 3} {'type': 'loss', 'content': 0.15156517922878265, 'timestamp': '2025-09-10 02:56:36.149457', 'step': 16745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:36.205638', 'step': 16745, 'epoch': 3} {'type': 'loss', 'content': 0.07835663855075836, 'timestamp': '2025-09-10 02:56:36.207823', 'step': 16746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:36.265023', 'step': 16746, 'epoch': 3} {'type': 'loss', 'content': 0.1310851126909256, 'timestamp': '2025-09-10 02:56:36.267259', 'step': 16747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:36.332938', 'step': 16747, 'epoch': 3} {'type': 'loss', 'content': 0.0491475909948349, 'timestamp': '2025-09-10 02:56:36.339416', 'step': 16748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:36.398297', 'step': 16748, 'epoch': 3} {'type': 'loss', 'content': 0.07854718714952469, 'timestamp': '2025-09-10 02:56:36.400490', 'step': 16749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:36.454983', 'step': 16749, 'epoch': 3} {'type': 'loss', 'content': 0.0407528430223465, 'timestamp': '2025-09-10 02:56:36.457134', 'step': 16750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:36.511905', 'step': 16750, 'epoch': 3} {'type': 'loss', 'content': 0.12731021642684937, 'timestamp': '2025-09-10 02:56:36.514148', 'step': 16751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:36.570501', 'step': 16751, 'epoch': 3} {'type': 'loss', 'content': 0.12527067959308624, 'timestamp': '2025-09-10 02:56:36.577225', 'step': 16752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:36.632036', 'step': 16752, 'epoch': 3} {'type': 'loss', 'content': 0.13924114406108856, 'timestamp': '2025-09-10 02:56:36.634121', 'step': 16753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:36.689551', 'step': 16753, 'epoch': 3} {'type': 'loss', 'content': 0.0874917209148407, 'timestamp': '2025-09-10 02:56:36.691764', 'step': 16754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:36.747158', 'step': 16754, 'epoch': 3} {'type': 'loss', 'content': 0.061974894255399704, 'timestamp': '2025-09-10 02:56:36.749414', 'step': 16755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:36.805067', 'step': 16755, 'epoch': 3} {'type': 'loss', 'content': 0.14230512082576752, 'timestamp': '2025-09-10 02:56:36.811288', 'step': 16756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:36.865405', 'step': 16756, 'epoch': 3} {'type': 'loss', 'content': 0.03791528567671776, 'timestamp': '2025-09-10 02:56:36.867599', 'step': 16757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:36.922329', 'step': 16757, 'epoch': 3} {'type': 'loss', 'content': 0.15836559236049652, 'timestamp': '2025-09-10 02:56:36.924528', 'step': 16758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:36.980262', 'step': 16758, 'epoch': 3} {'type': 'loss', 'content': 0.10470936447381973, 'timestamp': '2025-09-10 02:56:36.982578', 'step': 16759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:37.039259', 'step': 16759, 'epoch': 3} {'type': 'loss', 'content': 0.18524500727653503, 'timestamp': '2025-09-10 02:56:37.045960', 'step': 16760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:37.101817', 'step': 16760, 'epoch': 3} {'type': 'loss', 'content': 0.0588519349694252, 'timestamp': '2025-09-10 02:56:37.103999', 'step': 16761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:37.161028', 'step': 16761, 'epoch': 3} {'type': 'loss', 'content': 0.07004023343324661, 'timestamp': '2025-09-10 02:56:37.163315', 'step': 16762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:37.220109', 'step': 16762, 'epoch': 3} {'type': 'loss', 'content': 0.056596748530864716, 'timestamp': '2025-09-10 02:56:37.222358', 'step': 16763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:37.280618', 'step': 16763, 'epoch': 3} {'type': 'loss', 'content': 0.04506053775548935, 'timestamp': '2025-09-10 02:56:37.287370', 'step': 16764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:37.344051', 'step': 16764, 'epoch': 3} {'type': 'loss', 'content': 0.08852972090244293, 'timestamp': '2025-09-10 02:56:37.346369', 'step': 16765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:37.402396', 'step': 16765, 'epoch': 3} {'type': 'loss', 'content': 0.09363928437232971, 'timestamp': '2025-09-10 02:56:37.404587', 'step': 16766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:37.459668', 'step': 16766, 'epoch': 3} {'type': 'loss', 'content': 0.10330726951360703, 'timestamp': '2025-09-10 02:56:37.461957', 'step': 16767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:37.516276', 'step': 16767, 'epoch': 3} {'type': 'loss', 'content': 0.11256907135248184, 'timestamp': '2025-09-10 02:56:37.522793', 'step': 16768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:37.576394', 'step': 16768, 'epoch': 3} {'type': 'loss', 'content': 0.10269208252429962, 'timestamp': '2025-09-10 02:56:37.578570', 'step': 16769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:37.633102', 'step': 16769, 'epoch': 3} {'type': 'loss', 'content': 0.07860930263996124, 'timestamp': '2025-09-10 02:56:37.635201', 'step': 16770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:37.689142', 'step': 16770, 'epoch': 3} {'type': 'loss', 'content': 0.061474062502384186, 'timestamp': '2025-09-10 02:56:37.691340', 'step': 16771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:37.744894', 'step': 16771, 'epoch': 3} {'type': 'loss', 'content': 0.1519489884376526, 'timestamp': '2025-09-10 02:56:37.751117', 'step': 16772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:37.805026', 'step': 16772, 'epoch': 3} {'type': 'loss', 'content': 0.13207323849201202, 'timestamp': '2025-09-10 02:56:37.807131', 'step': 16773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:37.863449', 'step': 16773, 'epoch': 3} {'type': 'loss', 'content': 0.07534720003604889, 'timestamp': '2025-09-10 02:56:37.865696', 'step': 16774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:37.919738', 'step': 16774, 'epoch': 3} {'type': 'loss', 'content': 0.05674339085817337, 'timestamp': '2025-09-10 02:56:37.922018', 'step': 16775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:37.975203', 'step': 16775, 'epoch': 3} {'type': 'loss', 'content': 0.07658073306083679, 'timestamp': '2025-09-10 02:56:37.981344', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:56:51.897721', 'step': 16776, 'epoch': 3} {'type': 'pplx', 'content': 12817.249925283142, 'timestamp': '2025-09-10 02:56:51.900860', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:51.959835', 'step': 16776, 'epoch': 3} {'type': 'loss', 'content': 0.09103942662477493, 'timestamp': '2025-09-10 02:56:51.962268', 'step': 16777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:52.020697', 'step': 16777, 'epoch': 3} {'type': 'loss', 'content': 0.04518356919288635, 'timestamp': '2025-09-10 02:56:52.022854', 'step': 16778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:52.079246', 'step': 16778, 'epoch': 3} {'type': 'loss', 'content': 0.05619126185774803, 'timestamp': '2025-09-10 02:56:52.081119', 'step': 16779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:52.138559', 'step': 16779, 'epoch': 3} {'type': 'loss', 'content': 0.08399378508329391, 'timestamp': '2025-09-10 02:56:52.144676', 'step': 16780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:52.200085', 'step': 16780, 'epoch': 3} {'type': 'loss', 'content': 0.08702349662780762, 'timestamp': '2025-09-10 02:56:52.202293', 'step': 16781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:52.263533', 'step': 16781, 'epoch': 3} {'type': 'loss', 'content': 0.11439186334609985, 'timestamp': '2025-09-10 02:56:52.265992', 'step': 16782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:52.325202', 'step': 16782, 'epoch': 3} {'type': 'loss', 'content': 0.09120678156614304, 'timestamp': '2025-09-10 02:56:52.327394', 'step': 16783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:52.385974', 'step': 16783, 'epoch': 3} {'type': 'loss', 'content': 0.1524364948272705, 'timestamp': '2025-09-10 02:56:52.393669', 'step': 16784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:52.449939', 'step': 16784, 'epoch': 3} {'type': 'loss', 'content': 0.10098905116319656, 'timestamp': '2025-09-10 02:56:52.452114', 'step': 16785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:52.508033', 'step': 16785, 'epoch': 3} {'type': 'loss', 'content': 0.12186834216117859, 'timestamp': '2025-09-10 02:56:52.510183', 'step': 16786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:52.565383', 'step': 16786, 'epoch': 3} {'type': 'loss', 'content': 0.07880789041519165, 'timestamp': '2025-09-10 02:56:52.567628', 'step': 16787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:52.623286', 'step': 16787, 'epoch': 3} {'type': 'loss', 'content': 0.16651472449302673, 'timestamp': '2025-09-10 02:56:52.629578', 'step': 16788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:52.683785', 'step': 16788, 'epoch': 3} {'type': 'loss', 'content': 0.07810945063829422, 'timestamp': '2025-09-10 02:56:52.685872', 'step': 16789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:52.740796', 'step': 16789, 'epoch': 3} {'type': 'loss', 'content': 0.06473793834447861, 'timestamp': '2025-09-10 02:56:52.743208', 'step': 16790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:52.797704', 'step': 16790, 'epoch': 3} {'type': 'loss', 'content': 0.11033406853675842, 'timestamp': '2025-09-10 02:56:52.800018', 'step': 16791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:52.855694', 'step': 16791, 'epoch': 3} {'type': 'loss', 'content': 0.0874691903591156, 'timestamp': '2025-09-10 02:56:52.862218', 'step': 16792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:52.916692', 'step': 16792, 'epoch': 3} {'type': 'loss', 'content': 0.1308499574661255, 'timestamp': '2025-09-10 02:56:52.918929', 'step': 16793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:52.974150', 'step': 16793, 'epoch': 3} {'type': 'loss', 'content': 0.033401597291231155, 'timestamp': '2025-09-10 02:56:52.976315', 'step': 16794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:53.032281', 'step': 16794, 'epoch': 3} {'type': 'loss', 'content': 0.06841474026441574, 'timestamp': '2025-09-10 02:56:53.034452', 'step': 16795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:53.089359', 'step': 16795, 'epoch': 3} {'type': 'loss', 'content': 0.07503042370080948, 'timestamp': '2025-09-10 02:56:53.095710', 'step': 16796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:53.151257', 'step': 16796, 'epoch': 3} {'type': 'loss', 'content': 0.10468617081642151, 'timestamp': '2025-09-10 02:56:53.153465', 'step': 16797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:53.207927', 'step': 16797, 'epoch': 3} {'type': 'loss', 'content': 0.07205890864133835, 'timestamp': '2025-09-10 02:56:53.210130', 'step': 16798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:53.264698', 'step': 16798, 'epoch': 3} {'type': 'loss', 'content': 0.06164553761482239, 'timestamp': '2025-09-10 02:56:53.266434', 'step': 16799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:53.319977', 'step': 16799, 'epoch': 3} {'type': 'loss', 'content': 0.07581143826246262, 'timestamp': '2025-09-10 02:56:53.326103', 'step': 16800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:53.384682', 'step': 16800, 'epoch': 3} {'type': 'loss', 'content': 0.10569529980421066, 'timestamp': '2025-09-10 02:56:53.386842', 'step': 16801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:53.442907', 'step': 16801, 'epoch': 3} {'type': 'loss', 'content': 0.056854382157325745, 'timestamp': '2025-09-10 02:56:53.445070', 'step': 16802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:53.501182', 'step': 16802, 'epoch': 3} {'type': 'loss', 'content': 0.13145820796489716, 'timestamp': '2025-09-10 02:56:53.503401', 'step': 16803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:53.561963', 'step': 16803, 'epoch': 3} {'type': 'loss', 'content': 0.09748771041631699, 'timestamp': '2025-09-10 02:56:53.568806', 'step': 16804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:53.625880', 'step': 16804, 'epoch': 3} {'type': 'loss', 'content': 0.06954197585582733, 'timestamp': '2025-09-10 02:56:53.628246', 'step': 16805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:53.686676', 'step': 16805, 'epoch': 3} {'type': 'loss', 'content': 0.09446936845779419, 'timestamp': '2025-09-10 02:56:53.688839', 'step': 16806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:53.744746', 'step': 16806, 'epoch': 3} {'type': 'loss', 'content': 0.11714927107095718, 'timestamp': '2025-09-10 02:56:53.746947', 'step': 16807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:53.801748', 'step': 16807, 'epoch': 3} {'type': 'loss', 'content': 0.09514129161834717, 'timestamp': '2025-09-10 02:56:53.808048', 'step': 16808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:53.864016', 'step': 16808, 'epoch': 3} {'type': 'loss', 'content': 0.0654035285115242, 'timestamp': '2025-09-10 02:56:53.866512', 'step': 16809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:53.925222', 'step': 16809, 'epoch': 3} {'type': 'loss', 'content': 0.12983645498752594, 'timestamp': '2025-09-10 02:56:53.927415', 'step': 16810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:53.982984', 'step': 16810, 'epoch': 3} {'type': 'loss', 'content': 0.07725365459918976, 'timestamp': '2025-09-10 02:56:53.985103', 'step': 16811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:54.040722', 'step': 16811, 'epoch': 3} {'type': 'loss', 'content': 0.06402766704559326, 'timestamp': '2025-09-10 02:56:54.047019', 'step': 16812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:54.100668', 'step': 16812, 'epoch': 3} {'type': 'loss', 'content': 0.05050205439329147, 'timestamp': '2025-09-10 02:56:54.102795', 'step': 16813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:54.157068', 'step': 16813, 'epoch': 3} {'type': 'loss', 'content': 0.073936827480793, 'timestamp': '2025-09-10 02:56:54.159355', 'step': 16814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:54.214875', 'step': 16814, 'epoch': 3} {'type': 'loss', 'content': 0.09354016929864883, 'timestamp': '2025-09-10 02:56:54.218453', 'step': 16815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:54.274362', 'step': 16815, 'epoch': 3} {'type': 'loss', 'content': 0.10721758008003235, 'timestamp': '2025-09-10 02:56:54.280434', 'step': 16816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:54.335647', 'step': 16816, 'epoch': 3} {'type': 'loss', 'content': 0.04315916448831558, 'timestamp': '2025-09-10 02:56:54.337722', 'step': 16817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:54.394213', 'step': 16817, 'epoch': 3} {'type': 'loss', 'content': 0.04917770251631737, 'timestamp': '2025-09-10 02:56:54.396579', 'step': 16818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:54.454302', 'step': 16818, 'epoch': 3} {'type': 'loss', 'content': 0.07836352288722992, 'timestamp': '2025-09-10 02:56:54.456674', 'step': 16819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:54.511885', 'step': 16819, 'epoch': 3} {'type': 'loss', 'content': 0.08848491311073303, 'timestamp': '2025-09-10 02:56:54.518485', 'step': 16820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:54.573239', 'step': 16820, 'epoch': 3} {'type': 'loss', 'content': 0.1445309966802597, 'timestamp': '2025-09-10 02:56:54.575393', 'step': 16821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:54.629442', 'step': 16821, 'epoch': 3} {'type': 'loss', 'content': 0.1298782229423523, 'timestamp': '2025-09-10 02:56:54.631573', 'step': 16822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:54.687361', 'step': 16822, 'epoch': 3} {'type': 'loss', 'content': 0.18011493980884552, 'timestamp': '2025-09-10 02:56:54.689579', 'step': 16823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:54.743131', 'step': 16823, 'epoch': 3} {'type': 'loss', 'content': 0.17879144847393036, 'timestamp': '2025-09-10 02:56:54.749136', 'step': 16824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:54.803114', 'step': 16824, 'epoch': 3} {'type': 'loss', 'content': 0.05643988400697708, 'timestamp': '2025-09-10 02:56:54.805292', 'step': 16825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:54.860651', 'step': 16825, 'epoch': 3} {'type': 'loss', 'content': 0.15468931198120117, 'timestamp': '2025-09-10 02:56:54.862795', 'step': 16826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:54.916693', 'step': 16826, 'epoch': 3} {'type': 'loss', 'content': 0.121181920170784, 'timestamp': '2025-09-10 02:56:54.918956', 'step': 16827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:54.973523', 'step': 16827, 'epoch': 3} {'type': 'loss', 'content': 0.08555512130260468, 'timestamp': '2025-09-10 02:56:54.981034', 'step': 16828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:55.035212', 'step': 16828, 'epoch': 3} {'type': 'loss', 'content': 0.12996363639831543, 'timestamp': '2025-09-10 02:56:55.037302', 'step': 16829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:55.091524', 'step': 16829, 'epoch': 3} {'type': 'loss', 'content': 0.09731824696063995, 'timestamp': '2025-09-10 02:56:55.093675', 'step': 16830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:55.148301', 'step': 16830, 'epoch': 3} {'type': 'loss', 'content': 0.07137379795312881, 'timestamp': '2025-09-10 02:56:55.150565', 'step': 16831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:55.205372', 'step': 16831, 'epoch': 3} {'type': 'loss', 'content': 0.10480029135942459, 'timestamp': '2025-09-10 02:56:55.211296', 'step': 16832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:55.265910', 'step': 16832, 'epoch': 3} {'type': 'loss', 'content': 0.06498944014310837, 'timestamp': '2025-09-10 02:56:55.268144', 'step': 16833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:55.322064', 'step': 16833, 'epoch': 3} {'type': 'loss', 'content': 0.0905393436551094, 'timestamp': '2025-09-10 02:56:55.324464', 'step': 16834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:55.378972', 'step': 16834, 'epoch': 3} {'type': 'loss', 'content': 0.09304086118936539, 'timestamp': '2025-09-10 02:56:55.382439', 'step': 16835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:55.438271', 'step': 16835, 'epoch': 3} {'type': 'loss', 'content': 0.11261991411447525, 'timestamp': '2025-09-10 02:56:55.444169', 'step': 16836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:55.498394', 'step': 16836, 'epoch': 3} {'type': 'loss', 'content': 0.09319782257080078, 'timestamp': '2025-09-10 02:56:55.500647', 'step': 16837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:55.555982', 'step': 16837, 'epoch': 3} {'type': 'loss', 'content': 0.13213121891021729, 'timestamp': '2025-09-10 02:56:55.557867', 'step': 16838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:55.613176', 'step': 16838, 'epoch': 3} {'type': 'loss', 'content': 0.08520018309354782, 'timestamp': '2025-09-10 02:56:55.615323', 'step': 16839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:55.670116', 'step': 16839, 'epoch': 3} {'type': 'loss', 'content': 0.0668688639998436, 'timestamp': '2025-09-10 02:56:55.675988', 'step': 16840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:55.730261', 'step': 16840, 'epoch': 3} {'type': 'loss', 'content': 0.1472722291946411, 'timestamp': '2025-09-10 02:56:55.732169', 'step': 16841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:55.786828', 'step': 16841, 'epoch': 3} {'type': 'loss', 'content': 0.09087371826171875, 'timestamp': '2025-09-10 02:56:55.788992', 'step': 16842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:55.843804', 'step': 16842, 'epoch': 3} {'type': 'loss', 'content': 0.05344771966338158, 'timestamp': '2025-09-10 02:56:55.846123', 'step': 16843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:55.900350', 'step': 16843, 'epoch': 3} {'type': 'loss', 'content': 0.15491779148578644, 'timestamp': '2025-09-10 02:56:55.906317', 'step': 16844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:55.962396', 'step': 16844, 'epoch': 3} {'type': 'loss', 'content': 0.13366003334522247, 'timestamp': '2025-09-10 02:56:55.964547', 'step': 16845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:56.023055', 'step': 16845, 'epoch': 3} {'type': 'loss', 'content': 0.1629280149936676, 'timestamp': '2025-09-10 02:56:56.025309', 'step': 16846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:56.081909', 'step': 16846, 'epoch': 3} {'type': 'loss', 'content': 0.059012651443481445, 'timestamp': '2025-09-10 02:56:56.084181', 'step': 16847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:56.143815', 'step': 16847, 'epoch': 3} {'type': 'loss', 'content': 0.09427869319915771, 'timestamp': '2025-09-10 02:56:56.150193', 'step': 16848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:56.204644', 'step': 16848, 'epoch': 3} {'type': 'loss', 'content': 0.06960038840770721, 'timestamp': '2025-09-10 02:56:56.206806', 'step': 16849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:56.260904', 'step': 16849, 'epoch': 3} {'type': 'loss', 'content': 0.0943991094827652, 'timestamp': '2025-09-10 02:56:56.263168', 'step': 16850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:56.317201', 'step': 16850, 'epoch': 3} {'type': 'loss', 'content': 0.07504647970199585, 'timestamp': '2025-09-10 02:56:56.319474', 'step': 16851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:56.375202', 'step': 16851, 'epoch': 3} {'type': 'loss', 'content': 0.1141798347234726, 'timestamp': '2025-09-10 02:56:56.381326', 'step': 16852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:56.436236', 'step': 16852, 'epoch': 3} {'type': 'loss', 'content': 0.14924703538417816, 'timestamp': '2025-09-10 02:56:56.438566', 'step': 16853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:56.492851', 'step': 16853, 'epoch': 3} {'type': 'loss', 'content': 0.12054827809333801, 'timestamp': '2025-09-10 02:56:56.494862', 'step': 16854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:56.549691', 'step': 16854, 'epoch': 3} {'type': 'loss', 'content': 0.08477597683668137, 'timestamp': '2025-09-10 02:56:56.551909', 'step': 16855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:56.607438', 'step': 16855, 'epoch': 3} {'type': 'loss', 'content': 0.15782983601093292, 'timestamp': '2025-09-10 02:56:56.613565', 'step': 16856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:56.667724', 'step': 16856, 'epoch': 3} {'type': 'loss', 'content': 0.07571373879909515, 'timestamp': '2025-09-10 02:56:56.669637', 'step': 16857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:56.723314', 'step': 16857, 'epoch': 3} {'type': 'loss', 'content': 0.13382016122341156, 'timestamp': '2025-09-10 02:56:56.725653', 'step': 16858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:56.780050', 'step': 16858, 'epoch': 3} {'type': 'loss', 'content': 0.17032508552074432, 'timestamp': '2025-09-10 02:56:56.782407', 'step': 16859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:56.837467', 'step': 16859, 'epoch': 3} {'type': 'loss', 'content': 0.11362987011671066, 'timestamp': '2025-09-10 02:56:56.843630', 'step': 16860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:56.899074', 'step': 16860, 'epoch': 3} {'type': 'loss', 'content': 0.1282287836074829, 'timestamp': '2025-09-10 02:56:56.901501', 'step': 16861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:56.955765', 'step': 16861, 'epoch': 3} {'type': 'loss', 'content': 0.0892493724822998, 'timestamp': '2025-09-10 02:56:56.957999', 'step': 16862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:57.012886', 'step': 16862, 'epoch': 3} {'type': 'loss', 'content': 0.07826545089483261, 'timestamp': '2025-09-10 02:56:57.015635', 'step': 16863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:57.070375', 'step': 16863, 'epoch': 3} {'type': 'loss', 'content': 0.16451714932918549, 'timestamp': '2025-09-10 02:56:57.076268', 'step': 16864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:57.131195', 'step': 16864, 'epoch': 3} {'type': 'loss', 'content': 0.21665801107883453, 'timestamp': '2025-09-10 02:56:57.133228', 'step': 16865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:57.187661', 'step': 16865, 'epoch': 3} {'type': 'loss', 'content': 0.11390364170074463, 'timestamp': '2025-09-10 02:56:57.189823', 'step': 16866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:57.243810', 'step': 16866, 'epoch': 3} {'type': 'loss', 'content': 0.11105197668075562, 'timestamp': '2025-09-10 02:56:57.247402', 'step': 16867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:57.305702', 'step': 16867, 'epoch': 3} {'type': 'loss', 'content': 0.11551524698734283, 'timestamp': '2025-09-10 02:56:57.312032', 'step': 16868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:57.367354', 'step': 16868, 'epoch': 3} {'type': 'loss', 'content': 0.23185910284519196, 'timestamp': '2025-09-10 02:56:57.369517', 'step': 16869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:57.423797', 'step': 16869, 'epoch': 3} {'type': 'loss', 'content': 0.06612454354763031, 'timestamp': '2025-09-10 02:56:57.426003', 'step': 16870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:57.480604', 'step': 16870, 'epoch': 3} {'type': 'loss', 'content': 0.1647474318742752, 'timestamp': '2025-09-10 02:56:57.482733', 'step': 16871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:57.538408', 'step': 16871, 'epoch': 3} {'type': 'loss', 'content': 0.06470129638910294, 'timestamp': '2025-09-10 02:56:57.544660', 'step': 16872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:57.600180', 'step': 16872, 'epoch': 3} {'type': 'loss', 'content': 0.06950365751981735, 'timestamp': '2025-09-10 02:56:57.602043', 'step': 16873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:57.656459', 'step': 16873, 'epoch': 3} {'type': 'loss', 'content': 0.05763251706957817, 'timestamp': '2025-09-10 02:56:57.658768', 'step': 16874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:57.714720', 'step': 16874, 'epoch': 3} {'type': 'loss', 'content': 0.07937774062156677, 'timestamp': '2025-09-10 02:56:57.716839', 'step': 16875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:57.771602', 'step': 16875, 'epoch': 3} {'type': 'loss', 'content': 0.08339476585388184, 'timestamp': '2025-09-10 02:56:57.778001', 'step': 16876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:57.839499', 'step': 16876, 'epoch': 3} {'type': 'loss', 'content': 0.09604640305042267, 'timestamp': '2025-09-10 02:56:57.841739', 'step': 16877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:57.896527', 'step': 16877, 'epoch': 3} {'type': 'loss', 'content': 0.08184997737407684, 'timestamp': '2025-09-10 02:56:57.898535', 'step': 16878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:57.953881', 'step': 16878, 'epoch': 3} {'type': 'loss', 'content': 0.13334974646568298, 'timestamp': '2025-09-10 02:56:57.955825', 'step': 16879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:58.009813', 'step': 16879, 'epoch': 3} {'type': 'loss', 'content': 0.1020321398973465, 'timestamp': '2025-09-10 02:56:58.015970', 'step': 16880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:58.071393', 'step': 16880, 'epoch': 3} {'type': 'loss', 'content': 0.06530427932739258, 'timestamp': '2025-09-10 02:56:58.073659', 'step': 16881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:58.128235', 'step': 16881, 'epoch': 3} {'type': 'loss', 'content': 0.056224822998046875, 'timestamp': '2025-09-10 02:56:58.130392', 'step': 16882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:58.185996', 'step': 16882, 'epoch': 3} {'type': 'loss', 'content': 0.056292787194252014, 'timestamp': '2025-09-10 02:56:58.188002', 'step': 16883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:58.242014', 'step': 16883, 'epoch': 3} {'type': 'loss', 'content': 0.05356672406196594, 'timestamp': '2025-09-10 02:56:58.248198', 'step': 16884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:58.302242', 'step': 16884, 'epoch': 3} {'type': 'loss', 'content': 0.046445850282907486, 'timestamp': '2025-09-10 02:56:58.304432', 'step': 16885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:58.358689', 'step': 16885, 'epoch': 3} {'type': 'loss', 'content': 0.11237774789333344, 'timestamp': '2025-09-10 02:56:58.360903', 'step': 16886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:58.415479', 'step': 16886, 'epoch': 3} {'type': 'loss', 'content': 0.2076135277748108, 'timestamp': '2025-09-10 02:56:58.417658', 'step': 16887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:58.471745', 'step': 16887, 'epoch': 3} {'type': 'loss', 'content': 0.12806054949760437, 'timestamp': '2025-09-10 02:56:58.477908', 'step': 16888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:58.532392', 'step': 16888, 'epoch': 3} {'type': 'loss', 'content': 0.08432858437299728, 'timestamp': '2025-09-10 02:56:58.534300', 'step': 16889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:58.588455', 'step': 16889, 'epoch': 3} {'type': 'loss', 'content': 0.09744268655776978, 'timestamp': '2025-09-10 02:56:58.590568', 'step': 16890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:58.645031', 'step': 16890, 'epoch': 3} {'type': 'loss', 'content': 0.07290112227201462, 'timestamp': '2025-09-10 02:56:58.647408', 'step': 16891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:58.703681', 'step': 16891, 'epoch': 3} {'type': 'loss', 'content': 0.07490435987710953, 'timestamp': '2025-09-10 02:56:58.709915', 'step': 16892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:58.764400', 'step': 16892, 'epoch': 3} {'type': 'loss', 'content': 0.08909354358911514, 'timestamp': '2025-09-10 02:56:58.766117', 'step': 16893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:58.820002', 'step': 16893, 'epoch': 3} {'type': 'loss', 'content': 0.0791868343949318, 'timestamp': '2025-09-10 02:56:58.821821', 'step': 16894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:58.876222', 'step': 16894, 'epoch': 3} {'type': 'loss', 'content': 0.06599151343107224, 'timestamp': '2025-09-10 02:56:58.878048', 'step': 16895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:56:58.933199', 'step': 16895, 'epoch': 3} {'type': 'loss', 'content': 0.06268762052059174, 'timestamp': '2025-09-10 02:56:58.938876', 'step': 16896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:58.993060', 'step': 16896, 'epoch': 3} {'type': 'loss', 'content': 0.06078691408038139, 'timestamp': '2025-09-10 02:56:58.995233', 'step': 16897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:59.051946', 'step': 16897, 'epoch': 3} {'type': 'loss', 'content': 0.07639940828084946, 'timestamp': '2025-09-10 02:56:59.054505', 'step': 16898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:59.109774', 'step': 16898, 'epoch': 3} {'type': 'loss', 'content': 0.08639157563447952, 'timestamp': '2025-09-10 02:56:59.111980', 'step': 16899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:59.166712', 'step': 16899, 'epoch': 3} {'type': 'loss', 'content': 0.09373775124549866, 'timestamp': '2025-09-10 02:56:59.172577', 'step': 16900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:59.226580', 'step': 16900, 'epoch': 3} {'type': 'loss', 'content': 0.08694003522396088, 'timestamp': '2025-09-10 02:56:59.228774', 'step': 16901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:56:59.283929', 'step': 16901, 'epoch': 3} {'type': 'loss', 'content': 0.16264484822750092, 'timestamp': '2025-09-10 02:56:59.285777', 'step': 16902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:59.341029', 'step': 16902, 'epoch': 3} {'type': 'loss', 'content': 0.06245625764131546, 'timestamp': '2025-09-10 02:56:59.342730', 'step': 16903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:59.398199', 'step': 16903, 'epoch': 3} {'type': 'loss', 'content': 0.07927470654249191, 'timestamp': '2025-09-10 02:56:59.404023', 'step': 16904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:59.459396', 'step': 16904, 'epoch': 3} {'type': 'loss', 'content': 0.062308765947818756, 'timestamp': '2025-09-10 02:56:59.461311', 'step': 16905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:59.516775', 'step': 16905, 'epoch': 3} {'type': 'loss', 'content': 0.05731987953186035, 'timestamp': '2025-09-10 02:56:59.518629', 'step': 16906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:59.573904', 'step': 16906, 'epoch': 3} {'type': 'loss', 'content': 0.05438007041811943, 'timestamp': '2025-09-10 02:56:59.576094', 'step': 16907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:59.630911', 'step': 16907, 'epoch': 3} {'type': 'loss', 'content': 0.06587515771389008, 'timestamp': '2025-09-10 02:56:59.637022', 'step': 16908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:56:59.691365', 'step': 16908, 'epoch': 3} {'type': 'loss', 'content': 0.1256617158651352, 'timestamp': '2025-09-10 02:56:59.693595', 'step': 16909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:59.748084', 'step': 16909, 'epoch': 3} {'type': 'loss', 'content': 0.06412528455257416, 'timestamp': '2025-09-10 02:56:59.750167', 'step': 16910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:56:59.806317', 'step': 16910, 'epoch': 3} {'type': 'loss', 'content': 0.09964725375175476, 'timestamp': '2025-09-10 02:56:59.808000', 'step': 16911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:59.863192', 'step': 16911, 'epoch': 3} {'type': 'loss', 'content': 0.050128158181905746, 'timestamp': '2025-09-10 02:56:59.868997', 'step': 16912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:56:59.923527', 'step': 16912, 'epoch': 3} {'type': 'loss', 'content': 0.009970484301447868, 'timestamp': '2025-09-10 02:56:59.925215', 'step': 16913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:56:59.983775', 'step': 16913, 'epoch': 3} {'type': 'loss', 'content': 0.09104055911302567, 'timestamp': '2025-09-10 02:56:59.985731', 'step': 16914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:00.041844', 'step': 16914, 'epoch': 3} {'type': 'loss', 'content': 0.13628748059272766, 'timestamp': '2025-09-10 02:57:00.043998', 'step': 16915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:00.098828', 'step': 16915, 'epoch': 3} {'type': 'loss', 'content': 0.1076134666800499, 'timestamp': '2025-09-10 02:57:00.105132', 'step': 16916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:00.160684', 'step': 16916, 'epoch': 3} {'type': 'loss', 'content': 0.13079966604709625, 'timestamp': '2025-09-10 02:57:00.162637', 'step': 16917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:00.217168', 'step': 16917, 'epoch': 3} {'type': 'loss', 'content': 0.08389168232679367, 'timestamp': '2025-09-10 02:57:00.219235', 'step': 16918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:00.274730', 'step': 16918, 'epoch': 3} {'type': 'loss', 'content': 0.06619199365377426, 'timestamp': '2025-09-10 02:57:00.276514', 'step': 16919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:00.332249', 'step': 16919, 'epoch': 3} {'type': 'loss', 'content': 0.07420328259468079, 'timestamp': '2025-09-10 02:57:00.338449', 'step': 16920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:00.394171', 'step': 16920, 'epoch': 3} {'type': 'loss', 'content': 0.029308771714568138, 'timestamp': '2025-09-10 02:57:00.396397', 'step': 16921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:00.451311', 'step': 16921, 'epoch': 3} {'type': 'loss', 'content': 0.1303711086511612, 'timestamp': '2025-09-10 02:57:00.453563', 'step': 16922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:00.508860', 'step': 16922, 'epoch': 3} {'type': 'loss', 'content': 0.10617423802614212, 'timestamp': '2025-09-10 02:57:00.510845', 'step': 16923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:00.565795', 'step': 16923, 'epoch': 3} {'type': 'loss', 'content': 0.14492371678352356, 'timestamp': '2025-09-10 02:57:00.571837', 'step': 16924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:00.626414', 'step': 16924, 'epoch': 3} {'type': 'loss', 'content': 0.03199927508831024, 'timestamp': '2025-09-10 02:57:00.628370', 'step': 16925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:00.683672', 'step': 16925, 'epoch': 3} {'type': 'loss', 'content': 0.05466143414378166, 'timestamp': '2025-09-10 02:57:00.685483', 'step': 16926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:00.740189', 'step': 16926, 'epoch': 3} {'type': 'loss', 'content': 0.1299685537815094, 'timestamp': '2025-09-10 02:57:00.741877', 'step': 16927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:00.797067', 'step': 16927, 'epoch': 3} {'type': 'loss', 'content': 0.11972706764936447, 'timestamp': '2025-09-10 02:57:00.802728', 'step': 16928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:00.859529', 'step': 16928, 'epoch': 3} {'type': 'loss', 'content': 0.07218579202890396, 'timestamp': '2025-09-10 02:57:00.861368', 'step': 16929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:00.916092', 'step': 16929, 'epoch': 3} {'type': 'loss', 'content': 0.1643255650997162, 'timestamp': '2025-09-10 02:57:00.918666', 'step': 16930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:00.974163', 'step': 16930, 'epoch': 3} {'type': 'loss', 'content': 0.055241696536540985, 'timestamp': '2025-09-10 02:57:00.976046', 'step': 16931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:01.030879', 'step': 16931, 'epoch': 3} {'type': 'loss', 'content': 0.1052115261554718, 'timestamp': '2025-09-10 02:57:01.037124', 'step': 16932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:01.091874', 'step': 16932, 'epoch': 3} {'type': 'loss', 'content': 0.1968645602464676, 'timestamp': '2025-09-10 02:57:01.094093', 'step': 16933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:01.148872', 'step': 16933, 'epoch': 3} {'type': 'loss', 'content': 0.09631162881851196, 'timestamp': '2025-09-10 02:57:01.151226', 'step': 16934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:01.207059', 'step': 16934, 'epoch': 3} {'type': 'loss', 'content': 0.06083949655294418, 'timestamp': '2025-09-10 02:57:01.208783', 'step': 16935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:01.264761', 'step': 16935, 'epoch': 3} {'type': 'loss', 'content': 0.05572594329714775, 'timestamp': '2025-09-10 02:57:01.270548', 'step': 16936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:01.324172', 'step': 16936, 'epoch': 3} {'type': 'loss', 'content': 0.05582163482904434, 'timestamp': '2025-09-10 02:57:01.325817', 'step': 16937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:01.381103', 'step': 16937, 'epoch': 3} {'type': 'loss', 'content': 0.11559047549962997, 'timestamp': '2025-09-10 02:57:01.382885', 'step': 16938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:01.452126', 'step': 16938, 'epoch': 3} {'type': 'loss', 'content': 0.06705901771783829, 'timestamp': '2025-09-10 02:57:01.454226', 'step': 16939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:01.509023', 'step': 16939, 'epoch': 3} {'type': 'loss', 'content': 0.1672869324684143, 'timestamp': '2025-09-10 02:57:01.515089', 'step': 16940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:01.569758', 'step': 16940, 'epoch': 3} {'type': 'loss', 'content': 0.09931706637144089, 'timestamp': '2025-09-10 02:57:01.571879', 'step': 16941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:01.627068', 'step': 16941, 'epoch': 3} {'type': 'loss', 'content': 0.11386425793170929, 'timestamp': '2025-09-10 02:57:01.629080', 'step': 16942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:01.683662', 'step': 16942, 'epoch': 3} {'type': 'loss', 'content': 0.17176735401153564, 'timestamp': '2025-09-10 02:57:01.685435', 'step': 16943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:01.740114', 'step': 16943, 'epoch': 3} {'type': 'loss', 'content': 0.07015853375196457, 'timestamp': '2025-09-10 02:57:01.745861', 'step': 16944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:01.800416', 'step': 16944, 'epoch': 3} {'type': 'loss', 'content': 0.14953964948654175, 'timestamp': '2025-09-10 02:57:01.802551', 'step': 16945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:01.856771', 'step': 16945, 'epoch': 3} {'type': 'loss', 'content': 0.08525325357913971, 'timestamp': '2025-09-10 02:57:01.858508', 'step': 16946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:01.914444', 'step': 16946, 'epoch': 3} {'type': 'loss', 'content': 0.0960288941860199, 'timestamp': '2025-09-10 02:57:01.916407', 'step': 16947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:01.972403', 'step': 16947, 'epoch': 3} {'type': 'loss', 'content': 0.08529332280158997, 'timestamp': '2025-09-10 02:57:01.978929', 'step': 16948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:02.034297', 'step': 16948, 'epoch': 3} {'type': 'loss', 'content': 0.06746721267700195, 'timestamp': '2025-09-10 02:57:02.036581', 'step': 16949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:02.091476', 'step': 16949, 'epoch': 3} {'type': 'loss', 'content': 0.10462211072444916, 'timestamp': '2025-09-10 02:57:02.093481', 'step': 16950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:02.148356', 'step': 16950, 'epoch': 3} {'type': 'loss', 'content': 0.020748956128954887, 'timestamp': '2025-09-10 02:57:02.150397', 'step': 16951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:02.205708', 'step': 16951, 'epoch': 3} {'type': 'loss', 'content': 0.06607706844806671, 'timestamp': '2025-09-10 02:57:02.211895', 'step': 16952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:02.266455', 'step': 16952, 'epoch': 3} {'type': 'loss', 'content': 0.17204059660434723, 'timestamp': '2025-09-10 02:57:02.268193', 'step': 16953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:02.323053', 'step': 16953, 'epoch': 3} {'type': 'loss', 'content': 0.09662191569805145, 'timestamp': '2025-09-10 02:57:02.324795', 'step': 16954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:02.379424', 'step': 16954, 'epoch': 3} {'type': 'loss', 'content': 0.08166208118200302, 'timestamp': '2025-09-10 02:57:02.381092', 'step': 16955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:02.436295', 'step': 16955, 'epoch': 3} {'type': 'loss', 'content': 0.12562915682792664, 'timestamp': '2025-09-10 02:57:02.442779', 'step': 16956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:02.498295', 'step': 16956, 'epoch': 3} {'type': 'loss', 'content': 0.10192660242319107, 'timestamp': '2025-09-10 02:57:02.500292', 'step': 16957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:02.556294', 'step': 16957, 'epoch': 3} {'type': 'loss', 'content': 0.11430983990430832, 'timestamp': '2025-09-10 02:57:02.558535', 'step': 16958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:02.613524', 'step': 16958, 'epoch': 3} {'type': 'loss', 'content': 0.19064541161060333, 'timestamp': '2025-09-10 02:57:02.615585', 'step': 16959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:02.670716', 'step': 16959, 'epoch': 3} {'type': 'loss', 'content': 0.06973938643932343, 'timestamp': '2025-09-10 02:57:02.676423', 'step': 16960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:02.734869', 'step': 16960, 'epoch': 3} {'type': 'loss', 'content': 0.16717974841594696, 'timestamp': '2025-09-10 02:57:02.736764', 'step': 16961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:02.792324', 'step': 16961, 'epoch': 3} {'type': 'loss', 'content': 0.07260677218437195, 'timestamp': '2025-09-10 02:57:02.794429', 'step': 16962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:02.855363', 'step': 16962, 'epoch': 3} {'type': 'loss', 'content': 0.10061459988355637, 'timestamp': '2025-09-10 02:57:02.857447', 'step': 16963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:02.913344', 'step': 16963, 'epoch': 3} {'type': 'loss', 'content': 0.04672929644584656, 'timestamp': '2025-09-10 02:57:02.919436', 'step': 16964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:02.974776', 'step': 16964, 'epoch': 3} {'type': 'loss', 'content': 0.10277310013771057, 'timestamp': '2025-09-10 02:57:02.976729', 'step': 16965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:03.032071', 'step': 16965, 'epoch': 3} {'type': 'loss', 'content': 0.015354454517364502, 'timestamp': '2025-09-10 02:57:03.034241', 'step': 16966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:03.090090', 'step': 16966, 'epoch': 3} {'type': 'loss', 'content': 0.1325792372226715, 'timestamp': '2025-09-10 02:57:03.091838', 'step': 16967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:03.146817', 'step': 16967, 'epoch': 3} {'type': 'loss', 'content': 0.08901354670524597, 'timestamp': '2025-09-10 02:57:03.153064', 'step': 16968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:03.207155', 'step': 16968, 'epoch': 3} {'type': 'loss', 'content': 0.0563751757144928, 'timestamp': '2025-09-10 02:57:03.208892', 'step': 16969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:03.263597', 'step': 16969, 'epoch': 3} {'type': 'loss', 'content': 0.09585470706224442, 'timestamp': '2025-09-10 02:57:03.265348', 'step': 16970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:03.320128', 'step': 16970, 'epoch': 3} {'type': 'loss', 'content': 0.09614874422550201, 'timestamp': '2025-09-10 02:57:03.322336', 'step': 16971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:03.377733', 'step': 16971, 'epoch': 3} {'type': 'loss', 'content': 0.09193582087755203, 'timestamp': '2025-09-10 02:57:03.383956', 'step': 16972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:03.437969', 'step': 16972, 'epoch': 3} {'type': 'loss', 'content': 0.23753628134727478, 'timestamp': '2025-09-10 02:57:03.439963', 'step': 16973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:03.494985', 'step': 16973, 'epoch': 3} {'type': 'loss', 'content': 0.06702419370412827, 'timestamp': '2025-09-10 02:57:03.497093', 'step': 16974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:03.552387', 'step': 16974, 'epoch': 3} {'type': 'loss', 'content': 0.10725464671850204, 'timestamp': '2025-09-10 02:57:03.554660', 'step': 16975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:03.610107', 'step': 16975, 'epoch': 3} {'type': 'loss', 'content': 0.06140967831015587, 'timestamp': '2025-09-10 02:57:03.616694', 'step': 16976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:03.671468', 'step': 16976, 'epoch': 3} {'type': 'loss', 'content': 0.08422277867794037, 'timestamp': '2025-09-10 02:57:03.673780', 'step': 16977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:03.729870', 'step': 16977, 'epoch': 3} {'type': 'loss', 'content': 0.18490736186504364, 'timestamp': '2025-09-10 02:57:03.732123', 'step': 16978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:03.787155', 'step': 16978, 'epoch': 3} {'type': 'loss', 'content': 0.08636133372783661, 'timestamp': '2025-09-10 02:57:03.789407', 'step': 16979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:03.844130', 'step': 16979, 'epoch': 3} {'type': 'loss', 'content': 0.06382187455892563, 'timestamp': '2025-09-10 02:57:03.850475', 'step': 16980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:03.905616', 'step': 16980, 'epoch': 3} {'type': 'loss', 'content': 0.06949109584093094, 'timestamp': '2025-09-10 02:57:03.907579', 'step': 16981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:03.962895', 'step': 16981, 'epoch': 3} {'type': 'loss', 'content': 0.08106861263513565, 'timestamp': '2025-09-10 02:57:03.964948', 'step': 16982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:04.019716', 'step': 16982, 'epoch': 3} {'type': 'loss', 'content': 0.08746764808893204, 'timestamp': '2025-09-10 02:57:04.021877', 'step': 16983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:04.078758', 'step': 16983, 'epoch': 3} {'type': 'loss', 'content': 0.13480237126350403, 'timestamp': '2025-09-10 02:57:04.085490', 'step': 16984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:04.141570', 'step': 16984, 'epoch': 3} {'type': 'loss', 'content': 0.10774343460798264, 'timestamp': '2025-09-10 02:57:04.144190', 'step': 16985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:04.200036', 'step': 16985, 'epoch': 3} {'type': 'loss', 'content': 0.05900023505091667, 'timestamp': '2025-09-10 02:57:04.202523', 'step': 16986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:04.257566', 'step': 16986, 'epoch': 3} {'type': 'loss', 'content': 0.12965452671051025, 'timestamp': '2025-09-10 02:57:04.259664', 'step': 16987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:04.313891', 'step': 16987, 'epoch': 3} {'type': 'loss', 'content': 0.0848260298371315, 'timestamp': '2025-09-10 02:57:04.320130', 'step': 16988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:04.374079', 'step': 16988, 'epoch': 3} {'type': 'loss', 'content': 0.10500211268663406, 'timestamp': '2025-09-10 02:57:04.376005', 'step': 16989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:04.431123', 'step': 16989, 'epoch': 3} {'type': 'loss', 'content': 0.08633995056152344, 'timestamp': '2025-09-10 02:57:04.433005', 'step': 16990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:04.487559', 'step': 16990, 'epoch': 3} {'type': 'loss', 'content': 0.149839848279953, 'timestamp': '2025-09-10 02:57:04.489836', 'step': 16991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:04.544395', 'step': 16991, 'epoch': 3} {'type': 'loss', 'content': 0.10233258455991745, 'timestamp': '2025-09-10 02:57:04.551149', 'step': 16992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:04.605239', 'step': 16992, 'epoch': 3} {'type': 'loss', 'content': 0.10798186808824539, 'timestamp': '2025-09-10 02:57:04.609468', 'step': 16993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:04.664458', 'step': 16993, 'epoch': 3} {'type': 'loss', 'content': 0.09519613534212112, 'timestamp': '2025-09-10 02:57:04.666322', 'step': 16994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:04.720479', 'step': 16994, 'epoch': 3} {'type': 'loss', 'content': 0.057909492403268814, 'timestamp': '2025-09-10 02:57:04.722614', 'step': 16995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:04.777141', 'step': 16995, 'epoch': 3} {'type': 'loss', 'content': 0.06746824830770493, 'timestamp': '2025-09-10 02:57:04.782892', 'step': 16996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:04.837045', 'step': 16996, 'epoch': 3} {'type': 'loss', 'content': 0.11430755257606506, 'timestamp': '2025-09-10 02:57:04.838717', 'step': 16997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:04.895278', 'step': 16997, 'epoch': 3} {'type': 'loss', 'content': 0.13537797331809998, 'timestamp': '2025-09-10 02:57:04.897017', 'step': 16998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:04.953695', 'step': 16998, 'epoch': 3} {'type': 'loss', 'content': 0.11192204058170319, 'timestamp': '2025-09-10 02:57:04.955424', 'step': 16999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:05.009726', 'step': 16999, 'epoch': 3} {'type': 'loss', 'content': 0.09733380377292633, 'timestamp': '2025-09-10 02:57:05.017821', 'step': 17000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17000', 'timestamp': '2025-09-10 02:57:05.456519', 'step': 17000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:05.517386', 'step': 17000, 'epoch': 3} {'type': 'loss', 'content': 0.07973628491163254, 'timestamp': '2025-09-10 02:57:05.519378', 'step': 17001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:05.579858', 'step': 17001, 'epoch': 3} {'type': 'loss', 'content': 0.08512604236602783, 'timestamp': '2025-09-10 02:57:05.586336', 'step': 17002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:05.645017', 'step': 17002, 'epoch': 3} {'type': 'loss', 'content': 0.11564376950263977, 'timestamp': '2025-09-10 02:57:05.646895', 'step': 17003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:05.702419', 'step': 17003, 'epoch': 3} {'type': 'loss', 'content': 0.07813398540019989, 'timestamp': '2025-09-10 02:57:05.708362', 'step': 17004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:05.765990', 'step': 17004, 'epoch': 3} {'type': 'loss', 'content': 0.06927897781133652, 'timestamp': '2025-09-10 02:57:05.768336', 'step': 17005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:05.824206', 'step': 17005, 'epoch': 3} {'type': 'loss', 'content': 0.036427900195121765, 'timestamp': '2025-09-10 02:57:05.826570', 'step': 17006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:05.881454', 'step': 17006, 'epoch': 3} {'type': 'loss', 'content': 0.10862351208925247, 'timestamp': '2025-09-10 02:57:05.884189', 'step': 17007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:05.945074', 'step': 17007, 'epoch': 3} {'type': 'loss', 'content': 0.06676772236824036, 'timestamp': '2025-09-10 02:57:05.951396', 'step': 17008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:06.008465', 'step': 17008, 'epoch': 3} {'type': 'loss', 'content': 0.07377300411462784, 'timestamp': '2025-09-10 02:57:06.010793', 'step': 17009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:06.065175', 'step': 17009, 'epoch': 3} {'type': 'loss', 'content': 0.028545556589961052, 'timestamp': '2025-09-10 02:57:06.074615', 'step': 17010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:06.134032', 'step': 17010, 'epoch': 3} {'type': 'loss', 'content': 0.10638468712568283, 'timestamp': '2025-09-10 02:57:06.136397', 'step': 17011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:06.192007', 'step': 17011, 'epoch': 3} {'type': 'loss', 'content': 0.12549570202827454, 'timestamp': '2025-09-10 02:57:06.198432', 'step': 17012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:06.253559', 'step': 17012, 'epoch': 3} {'type': 'loss', 'content': 0.13216754794120789, 'timestamp': '2025-09-10 02:57:06.257146', 'step': 17013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:06.313391', 'step': 17013, 'epoch': 3} {'type': 'loss', 'content': 0.13860811293125153, 'timestamp': '2025-09-10 02:57:06.323563', 'step': 17014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:06.378995', 'step': 17014, 'epoch': 3} {'type': 'loss', 'content': 0.0769285038113594, 'timestamp': '2025-09-10 02:57:06.381166', 'step': 17015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:06.435960', 'step': 17015, 'epoch': 3} {'type': 'loss', 'content': 0.05837484076619148, 'timestamp': '2025-09-10 02:57:06.443426', 'step': 17016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:06.506532', 'step': 17016, 'epoch': 3} {'type': 'loss', 'content': 0.10598891973495483, 'timestamp': '2025-09-10 02:57:06.508693', 'step': 17017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:06.563530', 'step': 17017, 'epoch': 3} {'type': 'loss', 'content': 0.1515105962753296, 'timestamp': '2025-09-10 02:57:06.566483', 'step': 17018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:06.623511', 'step': 17018, 'epoch': 3} {'type': 'loss', 'content': 0.12518002092838287, 'timestamp': '2025-09-10 02:57:06.625658', 'step': 17019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:06.681088', 'step': 17019, 'epoch': 3} {'type': 'loss', 'content': 0.12458735704421997, 'timestamp': '2025-09-10 02:57:06.687602', 'step': 17020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:06.742336', 'step': 17020, 'epoch': 3} {'type': 'loss', 'content': 0.09378904849290848, 'timestamp': '2025-09-10 02:57:06.744538', 'step': 17021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:06.799866', 'step': 17021, 'epoch': 3} {'type': 'loss', 'content': 0.14168228209018707, 'timestamp': '2025-09-10 02:57:06.802061', 'step': 17022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:06.857407', 'step': 17022, 'epoch': 3} {'type': 'loss', 'content': 0.07989834249019623, 'timestamp': '2025-09-10 02:57:06.859669', 'step': 17023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:06.914383', 'step': 17023, 'epoch': 3} {'type': 'loss', 'content': 0.09661674499511719, 'timestamp': '2025-09-10 02:57:06.921181', 'step': 17024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:06.977999', 'step': 17024, 'epoch': 3} {'type': 'loss', 'content': 0.08697504550218582, 'timestamp': '2025-09-10 02:57:06.980192', 'step': 17025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:07.035984', 'step': 17025, 'epoch': 3} {'type': 'loss', 'content': 0.16119414567947388, 'timestamp': '2025-09-10 02:57:07.038137', 'step': 17026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:07.092787', 'step': 17026, 'epoch': 3} {'type': 'loss', 'content': 0.10715334862470627, 'timestamp': '2025-09-10 02:57:07.094865', 'step': 17027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:07.150128', 'step': 17027, 'epoch': 3} {'type': 'loss', 'content': 0.061462968587875366, 'timestamp': '2025-09-10 02:57:07.156264', 'step': 17028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:07.209804', 'step': 17028, 'epoch': 3} {'type': 'loss', 'content': 0.09426318854093552, 'timestamp': '2025-09-10 02:57:07.212052', 'step': 17029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:07.267229', 'step': 17029, 'epoch': 3} {'type': 'loss', 'content': 0.048324067145586014, 'timestamp': '2025-09-10 02:57:07.269564', 'step': 17030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:07.325711', 'step': 17030, 'epoch': 3} {'type': 'loss', 'content': 0.14572831988334656, 'timestamp': '2025-09-10 02:57:07.327923', 'step': 17031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:07.382776', 'step': 17031, 'epoch': 3} {'type': 'loss', 'content': 0.08492662012577057, 'timestamp': '2025-09-10 02:57:07.388828', 'step': 17032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:07.443106', 'step': 17032, 'epoch': 3} {'type': 'loss', 'content': 0.10350995510816574, 'timestamp': '2025-09-10 02:57:07.445447', 'step': 17033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:07.501101', 'step': 17033, 'epoch': 3} {'type': 'loss', 'content': 0.1283034384250641, 'timestamp': '2025-09-10 02:57:07.503336', 'step': 17034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:07.559345', 'step': 17034, 'epoch': 3} {'type': 'loss', 'content': 0.13979145884513855, 'timestamp': '2025-09-10 02:57:07.561421', 'step': 17035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:07.616530', 'step': 17035, 'epoch': 3} {'type': 'loss', 'content': 0.1318197399377823, 'timestamp': '2025-09-10 02:57:07.622736', 'step': 17036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:07.678577', 'step': 17036, 'epoch': 3} {'type': 'loss', 'content': 0.1252320408821106, 'timestamp': '2025-09-10 02:57:07.680841', 'step': 17037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:07.736286', 'step': 17037, 'epoch': 3} {'type': 'loss', 'content': 0.09578827768564224, 'timestamp': '2025-09-10 02:57:07.738571', 'step': 17038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:07.794225', 'step': 17038, 'epoch': 3} {'type': 'loss', 'content': 0.04299843683838844, 'timestamp': '2025-09-10 02:57:07.796559', 'step': 17039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:07.852993', 'step': 17039, 'epoch': 3} {'type': 'loss', 'content': 0.07811334729194641, 'timestamp': '2025-09-10 02:57:07.859393', 'step': 17040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:07.914409', 'step': 17040, 'epoch': 3} {'type': 'loss', 'content': 0.05996280163526535, 'timestamp': '2025-09-10 02:57:07.916581', 'step': 17041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:07.971437', 'step': 17041, 'epoch': 3} {'type': 'loss', 'content': 0.08521966636180878, 'timestamp': '2025-09-10 02:57:07.973672', 'step': 17042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:08.029780', 'step': 17042, 'epoch': 3} {'type': 'loss', 'content': 0.07743901759386063, 'timestamp': '2025-09-10 02:57:08.031777', 'step': 17043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:08.087585', 'step': 17043, 'epoch': 3} {'type': 'loss', 'content': 0.08354364335536957, 'timestamp': '2025-09-10 02:57:08.094012', 'step': 17044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:08.150738', 'step': 17044, 'epoch': 3} {'type': 'loss', 'content': 0.08726823329925537, 'timestamp': '2025-09-10 02:57:08.152776', 'step': 17045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:08.207853', 'step': 17045, 'epoch': 3} {'type': 'loss', 'content': 0.10457935184240341, 'timestamp': '2025-09-10 02:57:08.210032', 'step': 17046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:08.265944', 'step': 17046, 'epoch': 3} {'type': 'loss', 'content': 0.08253005146980286, 'timestamp': '2025-09-10 02:57:08.268270', 'step': 17047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:08.322978', 'step': 17047, 'epoch': 3} {'type': 'loss', 'content': 0.21934038400650024, 'timestamp': '2025-09-10 02:57:08.329086', 'step': 17048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:08.382838', 'step': 17048, 'epoch': 3} {'type': 'loss', 'content': 0.09866963326931, 'timestamp': '2025-09-10 02:57:08.384795', 'step': 17049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:08.438586', 'step': 17049, 'epoch': 3} {'type': 'loss', 'content': 0.0710800513625145, 'timestamp': '2025-09-10 02:57:08.440363', 'step': 17050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:08.495219', 'step': 17050, 'epoch': 3} {'type': 'loss', 'content': 0.11686845868825912, 'timestamp': '2025-09-10 02:57:08.497392', 'step': 17051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:08.552855', 'step': 17051, 'epoch': 3} {'type': 'loss', 'content': 0.14503048360347748, 'timestamp': '2025-09-10 02:57:08.559347', 'step': 17052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:08.615011', 'step': 17052, 'epoch': 3} {'type': 'loss', 'content': 0.04038793966174126, 'timestamp': '2025-09-10 02:57:08.617189', 'step': 17053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:08.675790', 'step': 17053, 'epoch': 3} {'type': 'loss', 'content': 0.059315044432878494, 'timestamp': '2025-09-10 02:57:08.678035', 'step': 17054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:08.732697', 'step': 17054, 'epoch': 3} {'type': 'loss', 'content': 0.19823697209358215, 'timestamp': '2025-09-10 02:57:08.734579', 'step': 17055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:08.789634', 'step': 17055, 'epoch': 3} {'type': 'loss', 'content': 0.028451429679989815, 'timestamp': '2025-09-10 02:57:08.795755', 'step': 17056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:08.851930', 'step': 17056, 'epoch': 3} {'type': 'loss', 'content': 0.08500371128320694, 'timestamp': '2025-09-10 02:57:08.854060', 'step': 17057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:08.908091', 'step': 17057, 'epoch': 3} {'type': 'loss', 'content': 0.1822848916053772, 'timestamp': '2025-09-10 02:57:08.910029', 'step': 17058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:08.963161', 'step': 17058, 'epoch': 3} {'type': 'loss', 'content': 0.07420343905687332, 'timestamp': '2025-09-10 02:57:08.965222', 'step': 17059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:09.019808', 'step': 17059, 'epoch': 3} {'type': 'loss', 'content': 0.08082570135593414, 'timestamp': '2025-09-10 02:57:09.025711', 'step': 17060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:09.079344', 'step': 17060, 'epoch': 3} {'type': 'loss', 'content': 0.1181989312171936, 'timestamp': '2025-09-10 02:57:09.081308', 'step': 17061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:09.137693', 'step': 17061, 'epoch': 3} {'type': 'loss', 'content': 0.07809344679117203, 'timestamp': '2025-09-10 02:57:09.139796', 'step': 17062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:09.194903', 'step': 17062, 'epoch': 3} {'type': 'loss', 'content': 0.07517115026712418, 'timestamp': '2025-09-10 02:57:09.196949', 'step': 17063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:09.252014', 'step': 17063, 'epoch': 3} {'type': 'loss', 'content': 0.08450178056955338, 'timestamp': '2025-09-10 02:57:09.258138', 'step': 17064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:09.312278', 'step': 17064, 'epoch': 3} {'type': 'loss', 'content': 0.06991138309240341, 'timestamp': '2025-09-10 02:57:09.314320', 'step': 17065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:09.369590', 'step': 17065, 'epoch': 3} {'type': 'loss', 'content': 0.11330576986074448, 'timestamp': '2025-09-10 02:57:09.371623', 'step': 17066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:09.427816', 'step': 17066, 'epoch': 3} {'type': 'loss', 'content': 0.09060537070035934, 'timestamp': '2025-09-10 02:57:09.429758', 'step': 17067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:09.487033', 'step': 17067, 'epoch': 3} {'type': 'loss', 'content': 0.06616854667663574, 'timestamp': '2025-09-10 02:57:09.493020', 'step': 17068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:09.547827', 'step': 17068, 'epoch': 3} {'type': 'loss', 'content': 0.07968206703662872, 'timestamp': '2025-09-10 02:57:09.549717', 'step': 17069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:09.608608', 'step': 17069, 'epoch': 3} {'type': 'loss', 'content': 0.05815541371703148, 'timestamp': '2025-09-10 02:57:09.610520', 'step': 17070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:09.669222', 'step': 17070, 'epoch': 3} {'type': 'loss', 'content': 0.10366195440292358, 'timestamp': '2025-09-10 02:57:09.671244', 'step': 17071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:09.726521', 'step': 17071, 'epoch': 3} {'type': 'loss', 'content': 0.10141640156507492, 'timestamp': '2025-09-10 02:57:09.732481', 'step': 17072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:09.789185', 'step': 17072, 'epoch': 3} {'type': 'loss', 'content': 0.05553600192070007, 'timestamp': '2025-09-10 02:57:09.791068', 'step': 17073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:09.845306', 'step': 17073, 'epoch': 3} {'type': 'loss', 'content': 0.08110778778791428, 'timestamp': '2025-09-10 02:57:09.847267', 'step': 17074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:09.901332', 'step': 17074, 'epoch': 3} {'type': 'loss', 'content': 0.09862504154443741, 'timestamp': '2025-09-10 02:57:09.903310', 'step': 17075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:09.959219', 'step': 17075, 'epoch': 3} {'type': 'loss', 'content': 0.16931816935539246, 'timestamp': '2025-09-10 02:57:09.965545', 'step': 17076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:10.023931', 'step': 17076, 'epoch': 3} {'type': 'loss', 'content': 0.0919552817940712, 'timestamp': '2025-09-10 02:57:10.026023', 'step': 17077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:10.082536', 'step': 17077, 'epoch': 3} {'type': 'loss', 'content': 0.030802281573414803, 'timestamp': '2025-09-10 02:57:10.084413', 'step': 17078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:10.139842', 'step': 17078, 'epoch': 3} {'type': 'loss', 'content': 0.1577947586774826, 'timestamp': '2025-09-10 02:57:10.142006', 'step': 17079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:10.199126', 'step': 17079, 'epoch': 3} {'type': 'loss', 'content': 0.09236322343349457, 'timestamp': '2025-09-10 02:57:10.205303', 'step': 17080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:10.260068', 'step': 17080, 'epoch': 3} {'type': 'loss', 'content': 0.09558199346065521, 'timestamp': '2025-09-10 02:57:10.262052', 'step': 17081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:10.317979', 'step': 17081, 'epoch': 3} {'type': 'loss', 'content': 0.17525620758533478, 'timestamp': '2025-09-10 02:57:10.320107', 'step': 17082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:10.375066', 'step': 17082, 'epoch': 3} {'type': 'loss', 'content': 0.13022178411483765, 'timestamp': '2025-09-10 02:57:10.377064', 'step': 17083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:10.435115', 'step': 17083, 'epoch': 3} {'type': 'loss', 'content': 0.15186354517936707, 'timestamp': '2025-09-10 02:57:10.441229', 'step': 17084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:10.496841', 'step': 17084, 'epoch': 3} {'type': 'loss', 'content': 0.11480596661567688, 'timestamp': '2025-09-10 02:57:10.498814', 'step': 17085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:10.552223', 'step': 17085, 'epoch': 3} {'type': 'loss', 'content': 0.06156061962246895, 'timestamp': '2025-09-10 02:57:10.554208', 'step': 17086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:10.608187', 'step': 17086, 'epoch': 3} {'type': 'loss', 'content': 0.11359678953886032, 'timestamp': '2025-09-10 02:57:10.610172', 'step': 17087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:10.666220', 'step': 17087, 'epoch': 3} {'type': 'loss', 'content': 0.1964457482099533, 'timestamp': '2025-09-10 02:57:10.672273', 'step': 17088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:10.728986', 'step': 17088, 'epoch': 3} {'type': 'loss', 'content': 0.13463428616523743, 'timestamp': '2025-09-10 02:57:10.730968', 'step': 17089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:10.787090', 'step': 17089, 'epoch': 3} {'type': 'loss', 'content': 0.11463727802038193, 'timestamp': '2025-09-10 02:57:10.789027', 'step': 17090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:10.844695', 'step': 17090, 'epoch': 3} {'type': 'loss', 'content': 0.053273770958185196, 'timestamp': '2025-09-10 02:57:10.846850', 'step': 17091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:10.902961', 'step': 17091, 'epoch': 3} {'type': 'loss', 'content': 0.1630723774433136, 'timestamp': '2025-09-10 02:57:10.909289', 'step': 17092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:10.963029', 'step': 17092, 'epoch': 3} {'type': 'loss', 'content': 0.16293483972549438, 'timestamp': '2025-09-10 02:57:10.964995', 'step': 17093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:11.020466', 'step': 17093, 'epoch': 3} {'type': 'loss', 'content': 0.13360944390296936, 'timestamp': '2025-09-10 02:57:11.022362', 'step': 17094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:11.076616', 'step': 17094, 'epoch': 3} {'type': 'loss', 'content': 0.1690286248922348, 'timestamp': '2025-09-10 02:57:11.078596', 'step': 17095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:11.134717', 'step': 17095, 'epoch': 3} {'type': 'loss', 'content': 0.09189721196889877, 'timestamp': '2025-09-10 02:57:11.140726', 'step': 17096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:11.194980', 'step': 17096, 'epoch': 3} {'type': 'loss', 'content': 0.09653539955615997, 'timestamp': '2025-09-10 02:57:11.196870', 'step': 17097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:11.251852', 'step': 17097, 'epoch': 3} {'type': 'loss', 'content': 0.07065065950155258, 'timestamp': '2025-09-10 02:57:11.253918', 'step': 17098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:11.311226', 'step': 17098, 'epoch': 3} {'type': 'loss', 'content': 0.04994279518723488, 'timestamp': '2025-09-10 02:57:11.313276', 'step': 17099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:11.368365', 'step': 17099, 'epoch': 3} {'type': 'loss', 'content': 0.1003694012761116, 'timestamp': '2025-09-10 02:57:11.374557', 'step': 17100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:11.429950', 'step': 17100, 'epoch': 3} {'type': 'loss', 'content': 0.05707116425037384, 'timestamp': '2025-09-10 02:57:11.431849', 'step': 17101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:11.486984', 'step': 17101, 'epoch': 3} {'type': 'loss', 'content': 0.21988889575004578, 'timestamp': '2025-09-10 02:57:11.489336', 'step': 17102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:11.545615', 'step': 17102, 'epoch': 3} {'type': 'loss', 'content': 0.043476514518260956, 'timestamp': '2025-09-10 02:57:11.547608', 'step': 17103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:11.603455', 'step': 17103, 'epoch': 3} {'type': 'loss', 'content': 0.07472531497478485, 'timestamp': '2025-09-10 02:57:11.609493', 'step': 17104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:11.663941', 'step': 17104, 'epoch': 3} {'type': 'loss', 'content': 0.1439114511013031, 'timestamp': '2025-09-10 02:57:11.666073', 'step': 17105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:11.721123', 'step': 17105, 'epoch': 3} {'type': 'loss', 'content': 0.12323810160160065, 'timestamp': '2025-09-10 02:57:11.723300', 'step': 17106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:11.778501', 'step': 17106, 'epoch': 3} {'type': 'loss', 'content': 0.06908868253231049, 'timestamp': '2025-09-10 02:57:11.780780', 'step': 17107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:11.837588', 'step': 17107, 'epoch': 3} {'type': 'loss', 'content': 0.2124759405851364, 'timestamp': '2025-09-10 02:57:11.844092', 'step': 17108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:11.900356', 'step': 17108, 'epoch': 3} {'type': 'loss', 'content': 0.05306814983487129, 'timestamp': '2025-09-10 02:57:11.902306', 'step': 17109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:11.957260', 'step': 17109, 'epoch': 3} {'type': 'loss', 'content': 0.10513759404420853, 'timestamp': '2025-09-10 02:57:11.959168', 'step': 17110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:12.014052', 'step': 17110, 'epoch': 3} {'type': 'loss', 'content': 0.07128769159317017, 'timestamp': '2025-09-10 02:57:12.016018', 'step': 17111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:12.071997', 'step': 17111, 'epoch': 3} {'type': 'loss', 'content': 0.0691702589392662, 'timestamp': '2025-09-10 02:57:12.078091', 'step': 17112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:12.132638', 'step': 17112, 'epoch': 3} {'type': 'loss', 'content': 0.08573399484157562, 'timestamp': '2025-09-10 02:57:12.134594', 'step': 17113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:12.188250', 'step': 17113, 'epoch': 3} {'type': 'loss', 'content': 0.09133805334568024, 'timestamp': '2025-09-10 02:57:12.190247', 'step': 17114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:12.246002', 'step': 17114, 'epoch': 3} {'type': 'loss', 'content': 0.11014539003372192, 'timestamp': '2025-09-10 02:57:12.248140', 'step': 17115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:12.303014', 'step': 17115, 'epoch': 3} {'type': 'loss', 'content': 0.11189049482345581, 'timestamp': '2025-09-10 02:57:12.309425', 'step': 17116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:12.364259', 'step': 17116, 'epoch': 3} {'type': 'loss', 'content': 0.06458751857280731, 'timestamp': '2025-09-10 02:57:12.366233', 'step': 17117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:12.420175', 'step': 17117, 'epoch': 3} {'type': 'loss', 'content': 0.027088267728686333, 'timestamp': '2025-09-10 02:57:12.422287', 'step': 17118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:12.478636', 'step': 17118, 'epoch': 3} {'type': 'loss', 'content': 0.0871080681681633, 'timestamp': '2025-09-10 02:57:12.480867', 'step': 17119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:12.536929', 'step': 17119, 'epoch': 3} {'type': 'loss', 'content': 0.10030602663755417, 'timestamp': '2025-09-10 02:57:12.543158', 'step': 17120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:12.598472', 'step': 17120, 'epoch': 3} {'type': 'loss', 'content': 0.04230784997344017, 'timestamp': '2025-09-10 02:57:12.600450', 'step': 17121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:57:12.654925', 'step': 17121, 'epoch': 3} {'type': 'loss', 'content': 0.08335979282855988, 'timestamp': '2025-09-10 02:57:12.657078', 'step': 17122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:12.712097', 'step': 17122, 'epoch': 3} {'type': 'loss', 'content': 0.03520822525024414, 'timestamp': '2025-09-10 02:57:12.714135', 'step': 17123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:12.769419', 'step': 17123, 'epoch': 3} {'type': 'loss', 'content': 0.06243366003036499, 'timestamp': '2025-09-10 02:57:12.775447', 'step': 17124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:12.832344', 'step': 17124, 'epoch': 3} {'type': 'loss', 'content': 0.0831088125705719, 'timestamp': '2025-09-10 02:57:12.834276', 'step': 17125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:12.897409', 'step': 17125, 'epoch': 3} {'type': 'loss', 'content': 0.15187224745750427, 'timestamp': '2025-09-10 02:57:12.899424', 'step': 17126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:12.953924', 'step': 17126, 'epoch': 3} {'type': 'loss', 'content': 0.11166364699602127, 'timestamp': '2025-09-10 02:57:12.955964', 'step': 17127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:13.009684', 'step': 17127, 'epoch': 3} {'type': 'loss', 'content': 0.08475792407989502, 'timestamp': '2025-09-10 02:57:13.015701', 'step': 17128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:13.072390', 'step': 17128, 'epoch': 3} {'type': 'loss', 'content': 0.07040141522884369, 'timestamp': '2025-09-10 02:57:13.074319', 'step': 17129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:13.130277', 'step': 17129, 'epoch': 3} {'type': 'loss', 'content': 0.15266729891300201, 'timestamp': '2025-09-10 02:57:13.132171', 'step': 17130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:13.186513', 'step': 17130, 'epoch': 3} {'type': 'loss', 'content': 0.20339412987232208, 'timestamp': '2025-09-10 02:57:13.188525', 'step': 17131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:13.242066', 'step': 17131, 'epoch': 3} {'type': 'loss', 'content': 0.16142085194587708, 'timestamp': '2025-09-10 02:57:13.248079', 'step': 17132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:13.301510', 'step': 17132, 'epoch': 3} {'type': 'loss', 'content': 0.05259048938751221, 'timestamp': '2025-09-10 02:57:13.303579', 'step': 17133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:13.358148', 'step': 17133, 'epoch': 3} {'type': 'loss', 'content': 0.12462028861045837, 'timestamp': '2025-09-10 02:57:13.360321', 'step': 17134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:13.419459', 'step': 17134, 'epoch': 3} {'type': 'loss', 'content': 0.07619702070951462, 'timestamp': '2025-09-10 02:57:13.421593', 'step': 17135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:13.480185', 'step': 17135, 'epoch': 3} {'type': 'loss', 'content': 0.15454982221126556, 'timestamp': '2025-09-10 02:57:13.486351', 'step': 17136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:13.542658', 'step': 17136, 'epoch': 3} {'type': 'loss', 'content': 0.02688201516866684, 'timestamp': '2025-09-10 02:57:13.544657', 'step': 17137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:13.600143', 'step': 17137, 'epoch': 3} {'type': 'loss', 'content': 0.17965874075889587, 'timestamp': '2025-09-10 02:57:13.602262', 'step': 17138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:13.659459', 'step': 17138, 'epoch': 3} {'type': 'loss', 'content': 0.07843178510665894, 'timestamp': '2025-09-10 02:57:13.661460', 'step': 17139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:57:13.717679', 'step': 17139, 'epoch': 3} {'type': 'loss', 'content': 0.10367296636104584, 'timestamp': '2025-09-10 02:57:13.723947', 'step': 17140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:13.779735', 'step': 17140, 'epoch': 3} {'type': 'loss', 'content': 0.062029171735048294, 'timestamp': '2025-09-10 02:57:13.781814', 'step': 17141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:13.835969', 'step': 17141, 'epoch': 3} {'type': 'loss', 'content': 0.04442110285162926, 'timestamp': '2025-09-10 02:57:13.837973', 'step': 17142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:13.894226', 'step': 17142, 'epoch': 3} {'type': 'loss', 'content': 0.14719344675540924, 'timestamp': '2025-09-10 02:57:13.896349', 'step': 17143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:13.952901', 'step': 17143, 'epoch': 3} {'type': 'loss', 'content': 0.14245419204235077, 'timestamp': '2025-09-10 02:57:13.959512', 'step': 17144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:14.015484', 'step': 17144, 'epoch': 3} {'type': 'loss', 'content': 0.086048923432827, 'timestamp': '2025-09-10 02:57:14.017692', 'step': 17145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:14.072605', 'step': 17145, 'epoch': 3} {'type': 'loss', 'content': 0.15399718284606934, 'timestamp': '2025-09-10 02:57:14.074645', 'step': 17146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:14.131822', 'step': 17146, 'epoch': 3} {'type': 'loss', 'content': 0.09697780758142471, 'timestamp': '2025-09-10 02:57:14.133858', 'step': 17147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:14.188396', 'step': 17147, 'epoch': 3} {'type': 'loss', 'content': 0.08958816528320312, 'timestamp': '2025-09-10 02:57:14.194699', 'step': 17148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:14.248664', 'step': 17148, 'epoch': 3} {'type': 'loss', 'content': 0.0704772025346756, 'timestamp': '2025-09-10 02:57:14.251028', 'step': 17149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:14.308725', 'step': 17149, 'epoch': 3} {'type': 'loss', 'content': 0.07748255133628845, 'timestamp': '2025-09-10 02:57:14.310745', 'step': 17150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:14.369014', 'step': 17150, 'epoch': 3} {'type': 'loss', 'content': 0.11135462671518326, 'timestamp': '2025-09-10 02:57:14.371085', 'step': 17151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:14.425809', 'step': 17151, 'epoch': 3} {'type': 'loss', 'content': 0.12098951637744904, 'timestamp': '2025-09-10 02:57:14.431869', 'step': 17152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:14.490614', 'step': 17152, 'epoch': 3} {'type': 'loss', 'content': 0.07164589315652847, 'timestamp': '2025-09-10 02:57:14.492688', 'step': 17153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:14.549786', 'step': 17153, 'epoch': 3} {'type': 'loss', 'content': 0.120611272752285, 'timestamp': '2025-09-10 02:57:14.551805', 'step': 17154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:14.606532', 'step': 17154, 'epoch': 3} {'type': 'loss', 'content': 0.09840869158506393, 'timestamp': '2025-09-10 02:57:14.608512', 'step': 17155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:14.664539', 'step': 17155, 'epoch': 3} {'type': 'loss', 'content': 0.07189061492681503, 'timestamp': '2025-09-10 02:57:14.670734', 'step': 17156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:14.727809', 'step': 17156, 'epoch': 3} {'type': 'loss', 'content': 0.13604237139225006, 'timestamp': '2025-09-10 02:57:14.729821', 'step': 17157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:14.784003', 'step': 17157, 'epoch': 3} {'type': 'loss', 'content': 0.11263430863618851, 'timestamp': '2025-09-10 02:57:14.786058', 'step': 17158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:14.862538', 'step': 17158, 'epoch': 3} {'type': 'loss', 'content': 0.07761362195014954, 'timestamp': '2025-09-10 02:57:14.864612', 'step': 17159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:14.921319', 'step': 17159, 'epoch': 3} {'type': 'loss', 'content': 0.05178427696228027, 'timestamp': '2025-09-10 02:57:14.927335', 'step': 17160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:14.990787', 'step': 17160, 'epoch': 3} {'type': 'loss', 'content': 0.15885144472122192, 'timestamp': '2025-09-10 02:57:14.992822', 'step': 17161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:15.055477', 'step': 17161, 'epoch': 3} {'type': 'loss', 'content': 0.10113315284252167, 'timestamp': '2025-09-10 02:57:15.057556', 'step': 17162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:15.112602', 'step': 17162, 'epoch': 3} {'type': 'loss', 'content': 0.1565655916929245, 'timestamp': '2025-09-10 02:57:15.114709', 'step': 17163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:15.169446', 'step': 17163, 'epoch': 3} {'type': 'loss', 'content': 0.026702089235186577, 'timestamp': '2025-09-10 02:57:15.175540', 'step': 17164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:15.235249', 'step': 17164, 'epoch': 3} {'type': 'loss', 'content': 0.1687747687101364, 'timestamp': '2025-09-10 02:57:15.237423', 'step': 17165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:15.296509', 'step': 17165, 'epoch': 3} {'type': 'loss', 'content': 0.1308702677488327, 'timestamp': '2025-09-10 02:57:15.298530', 'step': 17166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:15.354156', 'step': 17166, 'epoch': 3} {'type': 'loss', 'content': 0.08683227747678757, 'timestamp': '2025-09-10 02:57:15.356116', 'step': 17167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:15.412195', 'step': 17167, 'epoch': 3} {'type': 'loss', 'content': 0.06451769173145294, 'timestamp': '2025-09-10 02:57:15.418245', 'step': 17168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:15.473144', 'step': 17168, 'epoch': 3} {'type': 'loss', 'content': 0.11768276244401932, 'timestamp': '2025-09-10 02:57:15.478662', 'step': 17169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:15.540395', 'step': 17169, 'epoch': 3} {'type': 'loss', 'content': 0.0992114320397377, 'timestamp': '2025-09-10 02:57:15.546217', 'step': 17170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:15.611073', 'step': 17170, 'epoch': 3} {'type': 'loss', 'content': 0.053097933530807495, 'timestamp': '2025-09-10 02:57:15.613051', 'step': 17171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:15.678734', 'step': 17171, 'epoch': 3} {'type': 'loss', 'content': 0.10664926469326019, 'timestamp': '2025-09-10 02:57:15.685774', 'step': 17172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:15.743072', 'step': 17172, 'epoch': 3} {'type': 'loss', 'content': 0.07835769653320312, 'timestamp': '2025-09-10 02:57:15.748448', 'step': 17173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:15.804098', 'step': 17173, 'epoch': 3} {'type': 'loss', 'content': 0.11358284205198288, 'timestamp': '2025-09-10 02:57:15.806052', 'step': 17174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:57:15.860020', 'step': 17174, 'epoch': 3} {'type': 'loss', 'content': 0.09072145074605942, 'timestamp': '2025-09-10 02:57:15.862065', 'step': 17175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:15.916390', 'step': 17175, 'epoch': 3} {'type': 'loss', 'content': 0.04855217784643173, 'timestamp': '2025-09-10 02:57:15.922294', 'step': 17176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:15.975193', 'step': 17176, 'epoch': 3} {'type': 'loss', 'content': 0.15809503197669983, 'timestamp': '2025-09-10 02:57:15.978199', 'step': 17177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:16.038245', 'step': 17177, 'epoch': 3} {'type': 'loss', 'content': 0.03594610467553139, 'timestamp': '2025-09-10 02:57:16.040331', 'step': 17178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:16.094071', 'step': 17178, 'epoch': 3} {'type': 'loss', 'content': 0.15694546699523926, 'timestamp': '2025-09-10 02:57:16.096004', 'step': 17179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:16.149234', 'step': 17179, 'epoch': 3} {'type': 'loss', 'content': 0.05981115251779556, 'timestamp': '2025-09-10 02:57:16.155324', 'step': 17180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:16.208752', 'step': 17180, 'epoch': 3} {'type': 'loss', 'content': 0.1150183379650116, 'timestamp': '2025-09-10 02:57:16.210990', 'step': 17181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:16.264907', 'step': 17181, 'epoch': 3} {'type': 'loss', 'content': 0.11904354393482208, 'timestamp': '2025-09-10 02:57:16.267041', 'step': 17182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:16.321639', 'step': 17182, 'epoch': 3} {'type': 'loss', 'content': 0.06343294680118561, 'timestamp': '2025-09-10 02:57:16.323529', 'step': 17183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:16.378368', 'step': 17183, 'epoch': 3} {'type': 'loss', 'content': 0.08720050752162933, 'timestamp': '2025-09-10 02:57:16.384641', 'step': 17184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:16.438661', 'step': 17184, 'epoch': 3} {'type': 'loss', 'content': 0.08236533403396606, 'timestamp': '2025-09-10 02:57:16.440605', 'step': 17185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:16.494337', 'step': 17185, 'epoch': 3} {'type': 'loss', 'content': 0.11289476603269577, 'timestamp': '2025-09-10 02:57:16.496349', 'step': 17186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:16.550179', 'step': 17186, 'epoch': 3} {'type': 'loss', 'content': 0.1235254779458046, 'timestamp': '2025-09-10 02:57:16.552211', 'step': 17187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:16.605673', 'step': 17187, 'epoch': 3} {'type': 'loss', 'content': 0.03236369043588638, 'timestamp': '2025-09-10 02:57:16.611635', 'step': 17188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:16.664964', 'step': 17188, 'epoch': 3} {'type': 'loss', 'content': 0.15979987382888794, 'timestamp': '2025-09-10 02:57:16.666968', 'step': 17189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:16.720212', 'step': 17189, 'epoch': 3} {'type': 'loss', 'content': 0.058216266334056854, 'timestamp': '2025-09-10 02:57:16.722178', 'step': 17190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:16.775707', 'step': 17190, 'epoch': 3} {'type': 'loss', 'content': 0.12269017845392227, 'timestamp': '2025-09-10 02:57:16.777850', 'step': 17191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:16.832305', 'step': 17191, 'epoch': 3} {'type': 'loss', 'content': 0.113224096596241, 'timestamp': '2025-09-10 02:57:16.838525', 'step': 17192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:16.891797', 'step': 17192, 'epoch': 3} {'type': 'loss', 'content': 0.09135396033525467, 'timestamp': '2025-09-10 02:57:16.893810', 'step': 17193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:16.948199', 'step': 17193, 'epoch': 3} {'type': 'loss', 'content': 0.06814645230770111, 'timestamp': '2025-09-10 02:57:16.950137', 'step': 17194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:17.003867', 'step': 17194, 'epoch': 3} {'type': 'loss', 'content': 0.09983329474925995, 'timestamp': '2025-09-10 02:57:17.005953', 'step': 17195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:17.060777', 'step': 17195, 'epoch': 3} {'type': 'loss', 'content': 0.1540372222661972, 'timestamp': '2025-09-10 02:57:17.066882', 'step': 17196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:17.120090', 'step': 17196, 'epoch': 3} {'type': 'loss', 'content': 0.08928127586841583, 'timestamp': '2025-09-10 02:57:17.122087', 'step': 17197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:17.177009', 'step': 17197, 'epoch': 3} {'type': 'loss', 'content': 0.11598562449216843, 'timestamp': '2025-09-10 02:57:17.178984', 'step': 17198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:17.233548', 'step': 17198, 'epoch': 3} {'type': 'loss', 'content': 0.09285968542098999, 'timestamp': '2025-09-10 02:57:17.235712', 'step': 17199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:17.289165', 'step': 17199, 'epoch': 3} {'type': 'loss', 'content': 0.1512957215309143, 'timestamp': '2025-09-10 02:57:17.295111', 'step': 17200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:17.347928', 'step': 17200, 'epoch': 3} {'type': 'loss', 'content': 0.1330115646123886, 'timestamp': '2025-09-10 02:57:17.349857', 'step': 17201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:17.404571', 'step': 17201, 'epoch': 3} {'type': 'loss', 'content': 0.08328676223754883, 'timestamp': '2025-09-10 02:57:17.406566', 'step': 17202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:17.462422', 'step': 17202, 'epoch': 3} {'type': 'loss', 'content': 0.14655908942222595, 'timestamp': '2025-09-10 02:57:17.464395', 'step': 17203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:17.519020', 'step': 17203, 'epoch': 3} {'type': 'loss', 'content': 0.11482429504394531, 'timestamp': '2025-09-10 02:57:17.525101', 'step': 17204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:17.578631', 'step': 17204, 'epoch': 3} {'type': 'loss', 'content': 0.08769232034683228, 'timestamp': '2025-09-10 02:57:17.580789', 'step': 17205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:17.634987', 'step': 17205, 'epoch': 3} {'type': 'loss', 'content': 0.05669722706079483, 'timestamp': '2025-09-10 02:57:17.637050', 'step': 17206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:17.694391', 'step': 17206, 'epoch': 3} {'type': 'loss', 'content': 0.04826327785849571, 'timestamp': '2025-09-10 02:57:17.696371', 'step': 17207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:17.756694', 'step': 17207, 'epoch': 3} {'type': 'loss', 'content': 0.13274942338466644, 'timestamp': '2025-09-10 02:57:17.762799', 'step': 17208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:17.818891', 'step': 17208, 'epoch': 3} {'type': 'loss', 'content': 0.1176656037569046, 'timestamp': '2025-09-10 02:57:17.820878', 'step': 17209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:17.875688', 'step': 17209, 'epoch': 3} {'type': 'loss', 'content': 0.08676358312368393, 'timestamp': '2025-09-10 02:57:17.877591', 'step': 17210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:17.934403', 'step': 17210, 'epoch': 3} {'type': 'loss', 'content': 0.1447017788887024, 'timestamp': '2025-09-10 02:57:17.936418', 'step': 17211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:17.990517', 'step': 17211, 'epoch': 3} {'type': 'loss', 'content': 0.09684395045042038, 'timestamp': '2025-09-10 02:57:17.996508', 'step': 17212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:18.051513', 'step': 17212, 'epoch': 3} {'type': 'loss', 'content': 0.11262999475002289, 'timestamp': '2025-09-10 02:57:18.053511', 'step': 17213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:18.107022', 'step': 17213, 'epoch': 3} {'type': 'loss', 'content': 0.10822480171918869, 'timestamp': '2025-09-10 02:57:18.109009', 'step': 17214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:18.162707', 'step': 17214, 'epoch': 3} {'type': 'loss', 'content': 0.12177587300539017, 'timestamp': '2025-09-10 02:57:18.164700', 'step': 17215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:18.221626', 'step': 17215, 'epoch': 3} {'type': 'loss', 'content': 0.022965870797634125, 'timestamp': '2025-09-10 02:57:18.227731', 'step': 17216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:18.281612', 'step': 17216, 'epoch': 3} {'type': 'loss', 'content': 0.0871492251753807, 'timestamp': '2025-09-10 02:57:18.283502', 'step': 17217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:18.338431', 'step': 17217, 'epoch': 3} {'type': 'loss', 'content': 0.0528595931828022, 'timestamp': '2025-09-10 02:57:18.340349', 'step': 17218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:18.394627', 'step': 17218, 'epoch': 3} {'type': 'loss', 'content': 0.08216047286987305, 'timestamp': '2025-09-10 02:57:18.396555', 'step': 17219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:18.449757', 'step': 17219, 'epoch': 3} {'type': 'loss', 'content': 0.08749252557754517, 'timestamp': '2025-09-10 02:57:18.455795', 'step': 17220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:18.509360', 'step': 17220, 'epoch': 3} {'type': 'loss', 'content': 0.04656798392534256, 'timestamp': '2025-09-10 02:57:18.511479', 'step': 17221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:18.564800', 'step': 17221, 'epoch': 3} {'type': 'loss', 'content': 0.04639872908592224, 'timestamp': '2025-09-10 02:57:18.566853', 'step': 17222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:18.621513', 'step': 17222, 'epoch': 3} {'type': 'loss', 'content': 0.13723017275333405, 'timestamp': '2025-09-10 02:57:18.623566', 'step': 17223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:18.678496', 'step': 17223, 'epoch': 3} {'type': 'loss', 'content': 0.1041967049241066, 'timestamp': '2025-09-10 02:57:18.684309', 'step': 17224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:18.738797', 'step': 17224, 'epoch': 3} {'type': 'loss', 'content': 0.1113828718662262, 'timestamp': '2025-09-10 02:57:18.740731', 'step': 17225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:18.798812', 'step': 17225, 'epoch': 3} {'type': 'loss', 'content': 0.09565684199333191, 'timestamp': '2025-09-10 02:57:18.800761', 'step': 17226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:18.856350', 'step': 17226, 'epoch': 3} {'type': 'loss', 'content': 0.04948718100786209, 'timestamp': '2025-09-10 02:57:18.858317', 'step': 17227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:18.911490', 'step': 17227, 'epoch': 3} {'type': 'loss', 'content': 0.1581205278635025, 'timestamp': '2025-09-10 02:57:18.917385', 'step': 17228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:18.971090', 'step': 17228, 'epoch': 3} {'type': 'loss', 'content': 0.10613296926021576, 'timestamp': '2025-09-10 02:57:18.973082', 'step': 17229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:19.026132', 'step': 17229, 'epoch': 3} {'type': 'loss', 'content': 0.14008261263370514, 'timestamp': '2025-09-10 02:57:19.028144', 'step': 17230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:19.082416', 'step': 17230, 'epoch': 3} {'type': 'loss', 'content': 0.035058680921792984, 'timestamp': '2025-09-10 02:57:19.084478', 'step': 17231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:19.138079', 'step': 17231, 'epoch': 3} {'type': 'loss', 'content': 0.035093579441308975, 'timestamp': '2025-09-10 02:57:19.144045', 'step': 17232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:19.199271', 'step': 17232, 'epoch': 3} {'type': 'loss', 'content': 0.07005267590284348, 'timestamp': '2025-09-10 02:57:19.201510', 'step': 17233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:19.254937', 'step': 17233, 'epoch': 3} {'type': 'loss', 'content': 0.07424420863389969, 'timestamp': '2025-09-10 02:57:19.257061', 'step': 17234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:19.310880', 'step': 17234, 'epoch': 3} {'type': 'loss', 'content': 0.06037144735455513, 'timestamp': '2025-09-10 02:57:19.312943', 'step': 17235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:19.366004', 'step': 17235, 'epoch': 3} {'type': 'loss', 'content': 0.056796204298734665, 'timestamp': '2025-09-10 02:57:19.371809', 'step': 17236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:19.424426', 'step': 17236, 'epoch': 3} {'type': 'loss', 'content': 0.08230741322040558, 'timestamp': '2025-09-10 02:57:19.426625', 'step': 17237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:19.480363', 'step': 17237, 'epoch': 3} {'type': 'loss', 'content': 0.05497058480978012, 'timestamp': '2025-09-10 02:57:19.482361', 'step': 17238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:19.536772', 'step': 17238, 'epoch': 3} {'type': 'loss', 'content': 0.09817817062139511, 'timestamp': '2025-09-10 02:57:19.538762', 'step': 17239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:57:19.594199', 'step': 17239, 'epoch': 3} {'type': 'loss', 'content': 0.03836057707667351, 'timestamp': '2025-09-10 02:57:19.600006', 'step': 17240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:19.652912', 'step': 17240, 'epoch': 3} {'type': 'loss', 'content': 0.12845492362976074, 'timestamp': '2025-09-10 02:57:19.654878', 'step': 17241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:19.708749', 'step': 17241, 'epoch': 3} {'type': 'loss', 'content': 0.08670549094676971, 'timestamp': '2025-09-10 02:57:19.710673', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:57:32.441967', 'step': 17242, 'epoch': 3} {'type': 'pplx', 'content': 10961.35350527495, 'timestamp': '2025-09-10 02:57:32.444821', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:32.499805', 'step': 17242, 'epoch': 3} {'type': 'loss', 'content': 0.07871949672698975, 'timestamp': '2025-09-10 02:57:32.502175', 'step': 17243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:32.559263', 'step': 17243, 'epoch': 3} {'type': 'loss', 'content': 0.17553424835205078, 'timestamp': '2025-09-10 02:57:32.565535', 'step': 17244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:32.618937', 'step': 17244, 'epoch': 3} {'type': 'loss', 'content': 0.15087105333805084, 'timestamp': '2025-09-10 02:57:32.621138', 'step': 17245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:32.676415', 'step': 17245, 'epoch': 3} {'type': 'loss', 'content': 0.04493702948093414, 'timestamp': '2025-09-10 02:57:32.678556', 'step': 17246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:32.733285', 'step': 17246, 'epoch': 3} {'type': 'loss', 'content': 0.08250433951616287, 'timestamp': '2025-09-10 02:57:32.735545', 'step': 17247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:32.788498', 'step': 17247, 'epoch': 3} {'type': 'loss', 'content': 0.06651624292135239, 'timestamp': '2025-09-10 02:57:32.795891', 'step': 17248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:32.849090', 'step': 17248, 'epoch': 3} {'type': 'loss', 'content': 0.10493931174278259, 'timestamp': '2025-09-10 02:57:32.851182', 'step': 17249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:32.904060', 'step': 17249, 'epoch': 3} {'type': 'loss', 'content': 0.04586242511868477, 'timestamp': '2025-09-10 02:57:32.906304', 'step': 17250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:32.965809', 'step': 17250, 'epoch': 3} {'type': 'loss', 'content': 0.07024537026882172, 'timestamp': '2025-09-10 02:57:32.967902', 'step': 17251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:33.020778', 'step': 17251, 'epoch': 3} {'type': 'loss', 'content': 0.20572695136070251, 'timestamp': '2025-09-10 02:57:33.026718', 'step': 17252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:33.080515', 'step': 17252, 'epoch': 3} {'type': 'loss', 'content': 0.07300135493278503, 'timestamp': '2025-09-10 02:57:33.082643', 'step': 17253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:33.137453', 'step': 17253, 'epoch': 3} {'type': 'loss', 'content': 0.12988175451755524, 'timestamp': '2025-09-10 02:57:33.139552', 'step': 17254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:33.192843', 'step': 17254, 'epoch': 3} {'type': 'loss', 'content': 0.10799606144428253, 'timestamp': '2025-09-10 02:57:33.194996', 'step': 17255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:33.259981', 'step': 17255, 'epoch': 3} {'type': 'loss', 'content': 0.11480940133333206, 'timestamp': '2025-09-10 02:57:33.265984', 'step': 17256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:33.319609', 'step': 17256, 'epoch': 3} {'type': 'loss', 'content': 0.07925336062908173, 'timestamp': '2025-09-10 02:57:33.322609', 'step': 17257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:33.381990', 'step': 17257, 'epoch': 3} {'type': 'loss', 'content': 0.06602738052606583, 'timestamp': '2025-09-10 02:57:33.384707', 'step': 17258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:33.449788', 'step': 17258, 'epoch': 3} {'type': 'loss', 'content': 0.04517878219485283, 'timestamp': '2025-09-10 02:57:33.453050', 'step': 17259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:33.508768', 'step': 17259, 'epoch': 3} {'type': 'loss', 'content': 0.13159537315368652, 'timestamp': '2025-09-10 02:57:33.515417', 'step': 17260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:33.570529', 'step': 17260, 'epoch': 3} {'type': 'loss', 'content': 0.07346935570240021, 'timestamp': '2025-09-10 02:57:33.572954', 'step': 17261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:33.628690', 'step': 17261, 'epoch': 3} {'type': 'loss', 'content': 0.11137902736663818, 'timestamp': '2025-09-10 02:57:33.633008', 'step': 17262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:33.689126', 'step': 17262, 'epoch': 3} {'type': 'loss', 'content': 0.11830684542655945, 'timestamp': '2025-09-10 02:57:33.691881', 'step': 17263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:33.752460', 'step': 17263, 'epoch': 3} {'type': 'loss', 'content': 0.08966941386461258, 'timestamp': '2025-09-10 02:57:33.760614', 'step': 17264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:33.814029', 'step': 17264, 'epoch': 3} {'type': 'loss', 'content': 0.11067230999469757, 'timestamp': '2025-09-10 02:57:33.818859', 'step': 17265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:33.873369', 'step': 17265, 'epoch': 3} {'type': 'loss', 'content': 0.08495695888996124, 'timestamp': '2025-09-10 02:57:33.881731', 'step': 17266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:33.943252', 'step': 17266, 'epoch': 3} {'type': 'loss', 'content': 0.045022282749414444, 'timestamp': '2025-09-10 02:57:33.946685', 'step': 17267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:34.006154', 'step': 17267, 'epoch': 3} {'type': 'loss', 'content': 0.06190299242734909, 'timestamp': '2025-09-10 02:57:34.012309', 'step': 17268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:34.065267', 'step': 17268, 'epoch': 3} {'type': 'loss', 'content': 0.12999163568019867, 'timestamp': '2025-09-10 02:57:34.067443', 'step': 17269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:34.121166', 'step': 17269, 'epoch': 3} {'type': 'loss', 'content': 0.052469752728939056, 'timestamp': '2025-09-10 02:57:34.124331', 'step': 17270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:34.177955', 'step': 17270, 'epoch': 3} {'type': 'loss', 'content': 0.15668605268001556, 'timestamp': '2025-09-10 02:57:34.182657', 'step': 17271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:34.244774', 'step': 17271, 'epoch': 3} {'type': 'loss', 'content': 0.055376287549734116, 'timestamp': '2025-09-10 02:57:34.253508', 'step': 17272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:34.311188', 'step': 17272, 'epoch': 3} {'type': 'loss', 'content': 0.07948260009288788, 'timestamp': '2025-09-10 02:57:34.313491', 'step': 17273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:34.372673', 'step': 17273, 'epoch': 3} {'type': 'loss', 'content': 0.04288897663354874, 'timestamp': '2025-09-10 02:57:34.375283', 'step': 17274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:34.431905', 'step': 17274, 'epoch': 3} {'type': 'loss', 'content': 0.10911925137042999, 'timestamp': '2025-09-10 02:57:34.434200', 'step': 17275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:34.487205', 'step': 17275, 'epoch': 3} {'type': 'loss', 'content': 0.058839473873376846, 'timestamp': '2025-09-10 02:57:34.493867', 'step': 17276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:34.548660', 'step': 17276, 'epoch': 3} {'type': 'loss', 'content': 0.09206510335206985, 'timestamp': '2025-09-10 02:57:34.551389', 'step': 17277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:34.606049', 'step': 17277, 'epoch': 3} {'type': 'loss', 'content': 0.07413160055875778, 'timestamp': '2025-09-10 02:57:34.609205', 'step': 17278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:34.665268', 'step': 17278, 'epoch': 3} {'type': 'loss', 'content': 0.08135464787483215, 'timestamp': '2025-09-10 02:57:34.667661', 'step': 17279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:34.721392', 'step': 17279, 'epoch': 3} {'type': 'loss', 'content': 0.10953738540410995, 'timestamp': '2025-09-10 02:57:34.733431', 'step': 17280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:34.786376', 'step': 17280, 'epoch': 3} {'type': 'loss', 'content': 0.07681528478860855, 'timestamp': '2025-09-10 02:57:34.795404', 'step': 17281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:34.849676', 'step': 17281, 'epoch': 3} {'type': 'loss', 'content': 0.1152862086892128, 'timestamp': '2025-09-10 02:57:34.851964', 'step': 17282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:34.905129', 'step': 17282, 'epoch': 3} {'type': 'loss', 'content': 0.12385614216327667, 'timestamp': '2025-09-10 02:57:34.907310', 'step': 17283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:57:34.961591', 'step': 17283, 'epoch': 3} {'type': 'loss', 'content': 0.2518462836742401, 'timestamp': '2025-09-10 02:57:34.967453', 'step': 17284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:35.020431', 'step': 17284, 'epoch': 3} {'type': 'loss', 'content': 0.12544558942317963, 'timestamp': '2025-09-10 02:57:35.022539', 'step': 17285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:35.075263', 'step': 17285, 'epoch': 3} {'type': 'loss', 'content': 0.15470632910728455, 'timestamp': '2025-09-10 02:57:35.077511', 'step': 17286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:35.130592', 'step': 17286, 'epoch': 3} {'type': 'loss', 'content': 0.1551969051361084, 'timestamp': '2025-09-10 02:57:35.132989', 'step': 17287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:35.185376', 'step': 17287, 'epoch': 3} {'type': 'loss', 'content': 0.048064544796943665, 'timestamp': '2025-09-10 02:57:35.191299', 'step': 17288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:35.244097', 'step': 17288, 'epoch': 3} {'type': 'loss', 'content': 0.13684193789958954, 'timestamp': '2025-09-10 02:57:35.246398', 'step': 17289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:35.299092', 'step': 17289, 'epoch': 3} {'type': 'loss', 'content': 0.05137275159358978, 'timestamp': '2025-09-10 02:57:35.301351', 'step': 17290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:35.354710', 'step': 17290, 'epoch': 3} {'type': 'loss', 'content': 0.03612642362713814, 'timestamp': '2025-09-10 02:57:35.356887', 'step': 17291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:35.411212', 'step': 17291, 'epoch': 3} {'type': 'loss', 'content': 0.06712624430656433, 'timestamp': '2025-09-10 02:57:35.417343', 'step': 17292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:35.471126', 'step': 17292, 'epoch': 3} {'type': 'loss', 'content': 0.10877768695354462, 'timestamp': '2025-09-10 02:57:35.473371', 'step': 17293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:35.526941', 'step': 17293, 'epoch': 3} {'type': 'loss', 'content': 0.10328248888254166, 'timestamp': '2025-09-10 02:57:35.529108', 'step': 17294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:35.582836', 'step': 17294, 'epoch': 3} {'type': 'loss', 'content': 0.05490227788686752, 'timestamp': '2025-09-10 02:57:35.585129', 'step': 17295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:35.638353', 'step': 17295, 'epoch': 3} {'type': 'loss', 'content': 0.12205956876277924, 'timestamp': '2025-09-10 02:57:35.644192', 'step': 17296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:35.696529', 'step': 17296, 'epoch': 3} {'type': 'loss', 'content': 0.09559281170368195, 'timestamp': '2025-09-10 02:57:35.698704', 'step': 17297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:35.751803', 'step': 17297, 'epoch': 3} {'type': 'loss', 'content': 0.11059143394231796, 'timestamp': '2025-09-10 02:57:35.753681', 'step': 17298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:35.806959', 'step': 17298, 'epoch': 3} {'type': 'loss', 'content': 0.07752718776464462, 'timestamp': '2025-09-10 02:57:35.809080', 'step': 17299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:35.862344', 'step': 17299, 'epoch': 3} {'type': 'loss', 'content': 0.0607440248131752, 'timestamp': '2025-09-10 02:57:35.868431', 'step': 17300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:35.922986', 'step': 17300, 'epoch': 3} {'type': 'loss', 'content': 0.08231835067272186, 'timestamp': '2025-09-10 02:57:35.925277', 'step': 17301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:35.979707', 'step': 17301, 'epoch': 3} {'type': 'loss', 'content': 0.11938166618347168, 'timestamp': '2025-09-10 02:57:35.981817', 'step': 17302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:36.035002', 'step': 17302, 'epoch': 3} {'type': 'loss', 'content': 0.12203675508499146, 'timestamp': '2025-09-10 02:57:36.037119', 'step': 17303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:36.089572', 'step': 17303, 'epoch': 3} {'type': 'loss', 'content': 0.14872680604457855, 'timestamp': '2025-09-10 02:57:36.095397', 'step': 17304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:36.148133', 'step': 17304, 'epoch': 3} {'type': 'loss', 'content': 0.08236877620220184, 'timestamp': '2025-09-10 02:57:36.150185', 'step': 17305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:36.203551', 'step': 17305, 'epoch': 3} {'type': 'loss', 'content': 0.1110026016831398, 'timestamp': '2025-09-10 02:57:36.205661', 'step': 17306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:36.258918', 'step': 17306, 'epoch': 3} {'type': 'loss', 'content': 0.11467695236206055, 'timestamp': '2025-09-10 02:57:36.261083', 'step': 17307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:36.313903', 'step': 17307, 'epoch': 3} {'type': 'loss', 'content': 0.04852008819580078, 'timestamp': '2025-09-10 02:57:36.319678', 'step': 17308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:36.373169', 'step': 17308, 'epoch': 3} {'type': 'loss', 'content': 0.1122092753648758, 'timestamp': '2025-09-10 02:57:36.375438', 'step': 17309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:36.429207', 'step': 17309, 'epoch': 3} {'type': 'loss', 'content': 0.12482572346925735, 'timestamp': '2025-09-10 02:57:36.431283', 'step': 17310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:36.485622', 'step': 17310, 'epoch': 3} {'type': 'loss', 'content': 0.14526952803134918, 'timestamp': '2025-09-10 02:57:36.487824', 'step': 17311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:36.541932', 'step': 17311, 'epoch': 3} {'type': 'loss', 'content': 0.1401103138923645, 'timestamp': '2025-09-10 02:57:36.547961', 'step': 17312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:36.601086', 'step': 17312, 'epoch': 3} {'type': 'loss', 'content': 0.15729421377182007, 'timestamp': '2025-09-10 02:57:36.602759', 'step': 17313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:36.655920', 'step': 17313, 'epoch': 3} {'type': 'loss', 'content': 0.07390383630990982, 'timestamp': '2025-09-10 02:57:36.658207', 'step': 17314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:36.711578', 'step': 17314, 'epoch': 3} {'type': 'loss', 'content': 0.13075599074363708, 'timestamp': '2025-09-10 02:57:36.713825', 'step': 17315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:36.768093', 'step': 17315, 'epoch': 3} {'type': 'loss', 'content': 0.10518646240234375, 'timestamp': '2025-09-10 02:57:36.774481', 'step': 17316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:36.829821', 'step': 17316, 'epoch': 3} {'type': 'loss', 'content': 0.060702309012413025, 'timestamp': '2025-09-10 02:57:36.832073', 'step': 17317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:36.885521', 'step': 17317, 'epoch': 3} {'type': 'loss', 'content': 0.06676718592643738, 'timestamp': '2025-09-10 02:57:36.887686', 'step': 17318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:36.942183', 'step': 17318, 'epoch': 3} {'type': 'loss', 'content': 0.1323617696762085, 'timestamp': '2025-09-10 02:57:36.944305', 'step': 17319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:36.998396', 'step': 17319, 'epoch': 3} {'type': 'loss', 'content': 0.06223880127072334, 'timestamp': '2025-09-10 02:57:37.004679', 'step': 17320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:37.057781', 'step': 17320, 'epoch': 3} {'type': 'loss', 'content': 0.11143555492162704, 'timestamp': '2025-09-10 02:57:37.059999', 'step': 17321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:37.115183', 'step': 17321, 'epoch': 3} {'type': 'loss', 'content': 0.06041714549064636, 'timestamp': '2025-09-10 02:57:37.117332', 'step': 17322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:37.171622', 'step': 17322, 'epoch': 3} {'type': 'loss', 'content': 0.0999620333313942, 'timestamp': '2025-09-10 02:57:37.173766', 'step': 17323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:37.227421', 'step': 17323, 'epoch': 3} {'type': 'loss', 'content': 0.0904458612203598, 'timestamp': '2025-09-10 02:57:37.233594', 'step': 17324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:37.286410', 'step': 17324, 'epoch': 3} {'type': 'loss', 'content': 0.05434511974453926, 'timestamp': '2025-09-10 02:57:37.288557', 'step': 17325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:37.341863', 'step': 17325, 'epoch': 3} {'type': 'loss', 'content': 0.07042961567640305, 'timestamp': '2025-09-10 02:57:37.344098', 'step': 17326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:37.397505', 'step': 17326, 'epoch': 3} {'type': 'loss', 'content': 0.08868616074323654, 'timestamp': '2025-09-10 02:57:37.399795', 'step': 17327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:37.452986', 'step': 17327, 'epoch': 3} {'type': 'loss', 'content': 0.04987398535013199, 'timestamp': '2025-09-10 02:57:37.458809', 'step': 17328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:37.512619', 'step': 17328, 'epoch': 3} {'type': 'loss', 'content': 0.13831111788749695, 'timestamp': '2025-09-10 02:57:37.514872', 'step': 17329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:37.568570', 'step': 17329, 'epoch': 3} {'type': 'loss', 'content': 0.06423252075910568, 'timestamp': '2025-09-10 02:57:37.570838', 'step': 17330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:37.624173', 'step': 17330, 'epoch': 3} {'type': 'loss', 'content': 0.17365384101867676, 'timestamp': '2025-09-10 02:57:37.626185', 'step': 17331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:37.679608', 'step': 17331, 'epoch': 3} {'type': 'loss', 'content': 0.11632585525512695, 'timestamp': '2025-09-10 02:57:37.685699', 'step': 17332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:37.739651', 'step': 17332, 'epoch': 3} {'type': 'loss', 'content': 0.06434404104948044, 'timestamp': '2025-09-10 02:57:37.741782', 'step': 17333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:37.794601', 'step': 17333, 'epoch': 3} {'type': 'loss', 'content': 0.067268967628479, 'timestamp': '2025-09-10 02:57:37.796813', 'step': 17334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:37.850121', 'step': 17334, 'epoch': 3} {'type': 'loss', 'content': 0.0760839581489563, 'timestamp': '2025-09-10 02:57:37.852364', 'step': 17335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:37.905683', 'step': 17335, 'epoch': 3} {'type': 'loss', 'content': 0.13300484418869019, 'timestamp': '2025-09-10 02:57:37.911603', 'step': 17336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:37.963881', 'step': 17336, 'epoch': 3} {'type': 'loss', 'content': 0.10034102201461792, 'timestamp': '2025-09-10 02:57:37.965983', 'step': 17337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:38.018931', 'step': 17337, 'epoch': 3} {'type': 'loss', 'content': 0.18224500119686127, 'timestamp': '2025-09-10 02:57:38.021099', 'step': 17338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:38.074306', 'step': 17338, 'epoch': 3} {'type': 'loss', 'content': 0.09359864890575409, 'timestamp': '2025-09-10 02:57:38.076530', 'step': 17339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:38.129511', 'step': 17339, 'epoch': 3} {'type': 'loss', 'content': 0.05572831630706787, 'timestamp': '2025-09-10 02:57:38.135553', 'step': 17340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:38.188366', 'step': 17340, 'epoch': 3} {'type': 'loss', 'content': 0.13709527254104614, 'timestamp': '2025-09-10 02:57:38.190411', 'step': 17341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:38.245469', 'step': 17341, 'epoch': 3} {'type': 'loss', 'content': 0.07557724416255951, 'timestamp': '2025-09-10 02:57:38.247486', 'step': 17342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:38.302845', 'step': 17342, 'epoch': 3} {'type': 'loss', 'content': 0.10965967178344727, 'timestamp': '2025-09-10 02:57:38.305201', 'step': 17343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:38.361569', 'step': 17343, 'epoch': 3} {'type': 'loss', 'content': 0.04187672957777977, 'timestamp': '2025-09-10 02:57:38.368047', 'step': 17344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:38.423963', 'step': 17344, 'epoch': 3} {'type': 'loss', 'content': 0.11626428365707397, 'timestamp': '2025-09-10 02:57:38.426182', 'step': 17345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:38.482104', 'step': 17345, 'epoch': 3} {'type': 'loss', 'content': 0.10732237994670868, 'timestamp': '2025-09-10 02:57:38.484321', 'step': 17346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:38.540660', 'step': 17346, 'epoch': 3} {'type': 'loss', 'content': 0.05542541295289993, 'timestamp': '2025-09-10 02:57:38.542736', 'step': 17347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:38.599525', 'step': 17347, 'epoch': 3} {'type': 'loss', 'content': 0.16134637594223022, 'timestamp': '2025-09-10 02:57:38.606117', 'step': 17348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:38.663502', 'step': 17348, 'epoch': 3} {'type': 'loss', 'content': 0.05931219458580017, 'timestamp': '2025-09-10 02:57:38.665693', 'step': 17349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:38.721785', 'step': 17349, 'epoch': 3} {'type': 'loss', 'content': 0.08647018671035767, 'timestamp': '2025-09-10 02:57:38.723691', 'step': 17350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:38.778257', 'step': 17350, 'epoch': 3} {'type': 'loss', 'content': 0.13849537074565887, 'timestamp': '2025-09-10 02:57:38.780277', 'step': 17351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:38.833900', 'step': 17351, 'epoch': 3} {'type': 'loss', 'content': 0.06727338582277298, 'timestamp': '2025-09-10 02:57:38.839912', 'step': 17352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:38.892460', 'step': 17352, 'epoch': 3} {'type': 'loss', 'content': 0.08423231542110443, 'timestamp': '2025-09-10 02:57:38.894531', 'step': 17353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:38.947220', 'step': 17353, 'epoch': 3} {'type': 'loss', 'content': 0.06428182870149612, 'timestamp': '2025-09-10 02:57:38.949487', 'step': 17354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:39.002778', 'step': 17354, 'epoch': 3} {'type': 'loss', 'content': 0.2093621790409088, 'timestamp': '2025-09-10 02:57:39.004674', 'step': 17355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:39.058155', 'step': 17355, 'epoch': 3} {'type': 'loss', 'content': 0.06788189709186554, 'timestamp': '2025-09-10 02:57:39.064025', 'step': 17356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:39.119098', 'step': 17356, 'epoch': 3} {'type': 'loss', 'content': 0.05779457837343216, 'timestamp': '2025-09-10 02:57:39.121110', 'step': 17357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:39.174178', 'step': 17357, 'epoch': 3} {'type': 'loss', 'content': 0.1076650619506836, 'timestamp': '2025-09-10 02:57:39.176300', 'step': 17358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:39.230074', 'step': 17358, 'epoch': 3} {'type': 'loss', 'content': 0.09674528241157532, 'timestamp': '2025-09-10 02:57:39.232122', 'step': 17359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:39.285405', 'step': 17359, 'epoch': 3} {'type': 'loss', 'content': 0.13789397478103638, 'timestamp': '2025-09-10 02:57:39.291466', 'step': 17360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:39.344376', 'step': 17360, 'epoch': 3} {'type': 'loss', 'content': 0.09066379070281982, 'timestamp': '2025-09-10 02:57:39.346290', 'step': 17361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:39.398915', 'step': 17361, 'epoch': 3} {'type': 'loss', 'content': 0.11529179662466049, 'timestamp': '2025-09-10 02:57:39.401129', 'step': 17362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:39.454191', 'step': 17362, 'epoch': 3} {'type': 'loss', 'content': 0.07497721910476685, 'timestamp': '2025-09-10 02:57:39.456466', 'step': 17363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:39.509748', 'step': 17363, 'epoch': 3} {'type': 'loss', 'content': 0.13142316043376923, 'timestamp': '2025-09-10 02:57:39.515655', 'step': 17364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:39.568247', 'step': 17364, 'epoch': 3} {'type': 'loss', 'content': 0.009773989208042622, 'timestamp': '2025-09-10 02:57:39.570439', 'step': 17365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:39.623794', 'step': 17365, 'epoch': 3} {'type': 'loss', 'content': 0.1142163798213005, 'timestamp': '2025-09-10 02:57:39.625921', 'step': 17366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:39.685015', 'step': 17366, 'epoch': 3} {'type': 'loss', 'content': 0.03463476523756981, 'timestamp': '2025-09-10 02:57:39.687405', 'step': 17367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:39.747699', 'step': 17367, 'epoch': 3} {'type': 'loss', 'content': 0.13902227580547333, 'timestamp': '2025-09-10 02:57:39.754873', 'step': 17368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:39.813935', 'step': 17368, 'epoch': 3} {'type': 'loss', 'content': 0.06488347798585892, 'timestamp': '2025-09-10 02:57:39.816143', 'step': 17369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:39.875390', 'step': 17369, 'epoch': 3} {'type': 'loss', 'content': 0.09153147041797638, 'timestamp': '2025-09-10 02:57:39.877664', 'step': 17370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:39.939033', 'step': 17370, 'epoch': 3} {'type': 'loss', 'content': 0.05129006877541542, 'timestamp': '2025-09-10 02:57:39.941476', 'step': 17371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:39.999636', 'step': 17371, 'epoch': 3} {'type': 'loss', 'content': 0.09658785909414291, 'timestamp': '2025-09-10 02:57:40.006407', 'step': 17372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:40.061233', 'step': 17372, 'epoch': 3} {'type': 'loss', 'content': 0.09803138673305511, 'timestamp': '2025-09-10 02:57:40.063585', 'step': 17373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:40.116834', 'step': 17373, 'epoch': 3} {'type': 'loss', 'content': 0.04878351092338562, 'timestamp': '2025-09-10 02:57:40.118971', 'step': 17374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:40.172479', 'step': 17374, 'epoch': 3} {'type': 'loss', 'content': 0.13122591376304626, 'timestamp': '2025-09-10 02:57:40.174656', 'step': 17375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:40.228920', 'step': 17375, 'epoch': 3} {'type': 'loss', 'content': 0.12218631058931351, 'timestamp': '2025-09-10 02:57:40.234970', 'step': 17376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:40.287615', 'step': 17376, 'epoch': 3} {'type': 'loss', 'content': 0.049818940460681915, 'timestamp': '2025-09-10 02:57:40.289742', 'step': 17377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:40.343603', 'step': 17377, 'epoch': 3} {'type': 'loss', 'content': 0.06639568507671356, 'timestamp': '2025-09-10 02:57:40.345701', 'step': 17378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:40.399844', 'step': 17378, 'epoch': 3} {'type': 'loss', 'content': 0.08071155846118927, 'timestamp': '2025-09-10 02:57:40.402090', 'step': 17379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:40.455510', 'step': 17379, 'epoch': 3} {'type': 'loss', 'content': 0.06130344793200493, 'timestamp': '2025-09-10 02:57:40.461335', 'step': 17380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:40.513926', 'step': 17380, 'epoch': 3} {'type': 'loss', 'content': 0.09917879104614258, 'timestamp': '2025-09-10 02:57:40.516173', 'step': 17381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:40.570083', 'step': 17381, 'epoch': 3} {'type': 'loss', 'content': 0.07182870805263519, 'timestamp': '2025-09-10 02:57:40.572160', 'step': 17382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:40.625054', 'step': 17382, 'epoch': 3} {'type': 'loss', 'content': 0.14072397351264954, 'timestamp': '2025-09-10 02:57:40.627191', 'step': 17383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:40.680311', 'step': 17383, 'epoch': 3} {'type': 'loss', 'content': 0.0726543739438057, 'timestamp': '2025-09-10 02:57:40.686081', 'step': 17384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:40.738886', 'step': 17384, 'epoch': 3} {'type': 'loss', 'content': 0.09963500499725342, 'timestamp': '2025-09-10 02:57:40.740987', 'step': 17385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:40.795062', 'step': 17385, 'epoch': 3} {'type': 'loss', 'content': 0.06928155571222305, 'timestamp': '2025-09-10 02:57:40.797312', 'step': 17386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:40.851422', 'step': 17386, 'epoch': 3} {'type': 'loss', 'content': 0.059499602764844894, 'timestamp': '2025-09-10 02:57:40.853647', 'step': 17387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:40.906763', 'step': 17387, 'epoch': 3} {'type': 'loss', 'content': 0.15845593810081482, 'timestamp': '2025-09-10 02:57:40.912503', 'step': 17388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:40.964675', 'step': 17388, 'epoch': 3} {'type': 'loss', 'content': 0.043072741478681564, 'timestamp': '2025-09-10 02:57:40.966784', 'step': 17389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:41.020291', 'step': 17389, 'epoch': 3} {'type': 'loss', 'content': 0.24020743370056152, 'timestamp': '2025-09-10 02:57:41.022451', 'step': 17390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:41.075610', 'step': 17390, 'epoch': 3} {'type': 'loss', 'content': 0.08804403990507126, 'timestamp': '2025-09-10 02:57:41.077479', 'step': 17391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:41.129922', 'step': 17391, 'epoch': 3} {'type': 'loss', 'content': 0.07731705904006958, 'timestamp': '2025-09-10 02:57:41.135532', 'step': 17392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:41.187948', 'step': 17392, 'epoch': 3} {'type': 'loss', 'content': 0.04946494102478027, 'timestamp': '2025-09-10 02:57:41.189928', 'step': 17393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:41.242817', 'step': 17393, 'epoch': 3} {'type': 'loss', 'content': 0.12384121119976044, 'timestamp': '2025-09-10 02:57:41.244950', 'step': 17394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:41.299046', 'step': 17394, 'epoch': 3} {'type': 'loss', 'content': 0.05343247950077057, 'timestamp': '2025-09-10 02:57:41.301063', 'step': 17395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:41.354223', 'step': 17395, 'epoch': 3} {'type': 'loss', 'content': 0.12183115631341934, 'timestamp': '2025-09-10 02:57:41.361270', 'step': 17396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:41.413422', 'step': 17396, 'epoch': 3} {'type': 'loss', 'content': 0.11938846111297607, 'timestamp': '2025-09-10 02:57:41.415482', 'step': 17397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:41.468080', 'step': 17397, 'epoch': 3} {'type': 'loss', 'content': 0.06393526494503021, 'timestamp': '2025-09-10 02:57:41.470214', 'step': 17398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:41.524134', 'step': 17398, 'epoch': 3} {'type': 'loss', 'content': 0.06986338645219803, 'timestamp': '2025-09-10 02:57:41.526467', 'step': 17399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:41.581771', 'step': 17399, 'epoch': 3} {'type': 'loss', 'content': 0.06614424288272858, 'timestamp': '2025-09-10 02:57:41.587556', 'step': 17400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:41.639923', 'step': 17400, 'epoch': 3} {'type': 'loss', 'content': 0.13665062189102173, 'timestamp': '2025-09-10 02:57:41.642125', 'step': 17401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:41.694817', 'step': 17401, 'epoch': 3} {'type': 'loss', 'content': 0.06997496634721756, 'timestamp': '2025-09-10 02:57:41.697057', 'step': 17402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:41.750057', 'step': 17402, 'epoch': 3} {'type': 'loss', 'content': 0.12121286988258362, 'timestamp': '2025-09-10 02:57:41.752304', 'step': 17403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:41.805975', 'step': 17403, 'epoch': 3} {'type': 'loss', 'content': 0.09986954927444458, 'timestamp': '2025-09-10 02:57:41.811760', 'step': 17404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:41.864835', 'step': 17404, 'epoch': 3} {'type': 'loss', 'content': 0.12173005938529968, 'timestamp': '2025-09-10 02:57:41.866972', 'step': 17405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:41.919773', 'step': 17405, 'epoch': 3} {'type': 'loss', 'content': 0.09434513747692108, 'timestamp': '2025-09-10 02:57:41.922185', 'step': 17406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:41.975431', 'step': 17406, 'epoch': 3} {'type': 'loss', 'content': 0.05971384420990944, 'timestamp': '2025-09-10 02:57:41.977784', 'step': 17407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:42.031158', 'step': 17407, 'epoch': 3} {'type': 'loss', 'content': 0.03624032437801361, 'timestamp': '2025-09-10 02:57:42.037012', 'step': 17408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:42.095921', 'step': 17408, 'epoch': 3} {'type': 'loss', 'content': 0.10516630113124847, 'timestamp': '2025-09-10 02:57:42.097787', 'step': 17409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:42.153344', 'step': 17409, 'epoch': 3} {'type': 'loss', 'content': 0.08454923331737518, 'timestamp': '2025-09-10 02:57:42.155401', 'step': 17410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:42.208448', 'step': 17410, 'epoch': 3} {'type': 'loss', 'content': 0.10926574468612671, 'timestamp': '2025-09-10 02:57:42.210627', 'step': 17411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:42.265373', 'step': 17411, 'epoch': 3} {'type': 'loss', 'content': 0.07377104461193085, 'timestamp': '2025-09-10 02:57:42.275153', 'step': 17412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:42.328645', 'step': 17412, 'epoch': 3} {'type': 'loss', 'content': 0.032506491988897324, 'timestamp': '2025-09-10 02:57:42.330825', 'step': 17413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:42.384445', 'step': 17413, 'epoch': 3} {'type': 'loss', 'content': 0.17447924613952637, 'timestamp': '2025-09-10 02:57:42.393657', 'step': 17414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:42.451786', 'step': 17414, 'epoch': 3} {'type': 'loss', 'content': 0.12634386122226715, 'timestamp': '2025-09-10 02:57:42.454719', 'step': 17415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:42.508432', 'step': 17415, 'epoch': 3} {'type': 'loss', 'content': 0.11996972560882568, 'timestamp': '2025-09-10 02:57:42.514385', 'step': 17416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:42.566834', 'step': 17416, 'epoch': 3} {'type': 'loss', 'content': 0.07766365259885788, 'timestamp': '2025-09-10 02:57:42.569073', 'step': 17417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:42.622807', 'step': 17417, 'epoch': 3} {'type': 'loss', 'content': 0.10608259588479996, 'timestamp': '2025-09-10 02:57:42.627464', 'step': 17418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:42.681368', 'step': 17418, 'epoch': 3} {'type': 'loss', 'content': 0.0702415481209755, 'timestamp': '2025-09-10 02:57:42.683447', 'step': 17419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:42.737458', 'step': 17419, 'epoch': 3} {'type': 'loss', 'content': 0.09101252257823944, 'timestamp': '2025-09-10 02:57:42.743469', 'step': 17420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:42.795714', 'step': 17420, 'epoch': 3} {'type': 'loss', 'content': 0.11538773030042648, 'timestamp': '2025-09-10 02:57:42.797946', 'step': 17421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:42.852592', 'step': 17421, 'epoch': 3} {'type': 'loss', 'content': 0.10647638887166977, 'timestamp': '2025-09-10 02:57:42.854829', 'step': 17422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:42.907953', 'step': 17422, 'epoch': 3} {'type': 'loss', 'content': 0.04745544120669365, 'timestamp': '2025-09-10 02:57:42.910207', 'step': 17423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:42.963698', 'step': 17423, 'epoch': 3} {'type': 'loss', 'content': 0.043291036039590836, 'timestamp': '2025-09-10 02:57:42.969433', 'step': 17424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:43.021556', 'step': 17424, 'epoch': 3} {'type': 'loss', 'content': 0.10799073427915573, 'timestamp': '2025-09-10 02:57:43.023457', 'step': 17425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:43.076820', 'step': 17425, 'epoch': 3} {'type': 'loss', 'content': 0.04425180330872536, 'timestamp': '2025-09-10 02:57:43.078932', 'step': 17426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:43.134246', 'step': 17426, 'epoch': 3} {'type': 'loss', 'content': 0.03799986094236374, 'timestamp': '2025-09-10 02:57:43.136541', 'step': 17427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:43.189944', 'step': 17427, 'epoch': 3} {'type': 'loss', 'content': 0.16348546743392944, 'timestamp': '2025-09-10 02:57:43.195767', 'step': 17428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:43.248137', 'step': 17428, 'epoch': 3} {'type': 'loss', 'content': 0.044960759580135345, 'timestamp': '2025-09-10 02:57:43.250649', 'step': 17429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:43.303281', 'step': 17429, 'epoch': 3} {'type': 'loss', 'content': 0.0603659525513649, 'timestamp': '2025-09-10 02:57:43.305959', 'step': 17430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:43.359691', 'step': 17430, 'epoch': 3} {'type': 'loss', 'content': 0.13985934853553772, 'timestamp': '2025-09-10 02:57:43.361876', 'step': 17431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:43.415492', 'step': 17431, 'epoch': 3} {'type': 'loss', 'content': 0.07815974950790405, 'timestamp': '2025-09-10 02:57:43.421613', 'step': 17432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:43.477142', 'step': 17432, 'epoch': 3} {'type': 'loss', 'content': 0.17307625710964203, 'timestamp': '2025-09-10 02:57:43.479315', 'step': 17433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:43.534179', 'step': 17433, 'epoch': 3} {'type': 'loss', 'content': 0.09938978403806686, 'timestamp': '2025-09-10 02:57:43.539401', 'step': 17434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:43.595569', 'step': 17434, 'epoch': 3} {'type': 'loss', 'content': 0.09867110103368759, 'timestamp': '2025-09-10 02:57:43.597774', 'step': 17435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:43.651858', 'step': 17435, 'epoch': 3} {'type': 'loss', 'content': 0.0881892740726471, 'timestamp': '2025-09-10 02:57:43.660103', 'step': 17436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:43.722700', 'step': 17436, 'epoch': 3} {'type': 'loss', 'content': 0.12657278776168823, 'timestamp': '2025-09-10 02:57:43.724990', 'step': 17437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:43.778322', 'step': 17437, 'epoch': 3} {'type': 'loss', 'content': 0.05036478489637375, 'timestamp': '2025-09-10 02:57:43.780662', 'step': 17438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:43.834240', 'step': 17438, 'epoch': 3} {'type': 'loss', 'content': 0.08357849717140198, 'timestamp': '2025-09-10 02:57:43.838718', 'step': 17439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:43.893729', 'step': 17439, 'epoch': 3} {'type': 'loss', 'content': 0.08476056158542633, 'timestamp': '2025-09-10 02:57:43.899714', 'step': 17440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:43.954810', 'step': 17440, 'epoch': 3} {'type': 'loss', 'content': 0.11214062571525574, 'timestamp': '2025-09-10 02:57:43.957348', 'step': 17441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:44.010365', 'step': 17441, 'epoch': 3} {'type': 'loss', 'content': 0.047305233776569366, 'timestamp': '2025-09-10 02:57:44.012567', 'step': 17442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:44.072777', 'step': 17442, 'epoch': 3} {'type': 'loss', 'content': 0.058252960443496704, 'timestamp': '2025-09-10 02:57:44.076132', 'step': 17443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:44.137707', 'step': 17443, 'epoch': 3} {'type': 'loss', 'content': 0.07404668629169464, 'timestamp': '2025-09-10 02:57:44.143955', 'step': 17444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:44.196748', 'step': 17444, 'epoch': 3} {'type': 'loss', 'content': 0.10586188733577728, 'timestamp': '2025-09-10 02:57:44.199210', 'step': 17445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:44.252611', 'step': 17445, 'epoch': 3} {'type': 'loss', 'content': 0.08321864157915115, 'timestamp': '2025-09-10 02:57:44.254735', 'step': 17446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:44.308741', 'step': 17446, 'epoch': 3} {'type': 'loss', 'content': 0.07366321980953217, 'timestamp': '2025-09-10 02:57:44.310969', 'step': 17447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:44.366605', 'step': 17447, 'epoch': 3} {'type': 'loss', 'content': 0.11865618824958801, 'timestamp': '2025-09-10 02:57:44.372537', 'step': 17448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:44.428089', 'step': 17448, 'epoch': 3} {'type': 'loss', 'content': 0.12619845569133759, 'timestamp': '2025-09-10 02:57:44.430315', 'step': 17449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:44.492956', 'step': 17449, 'epoch': 3} {'type': 'loss', 'content': 0.1305924355983734, 'timestamp': '2025-09-10 02:57:44.495161', 'step': 17450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:44.548307', 'step': 17450, 'epoch': 3} {'type': 'loss', 'content': 0.11330726742744446, 'timestamp': '2025-09-10 02:57:44.550527', 'step': 17451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:44.603217', 'step': 17451, 'epoch': 3} {'type': 'loss', 'content': 0.10536214709281921, 'timestamp': '2025-09-10 02:57:44.609156', 'step': 17452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:44.662282', 'step': 17452, 'epoch': 3} {'type': 'loss', 'content': 0.06879962980747223, 'timestamp': '2025-09-10 02:57:44.664482', 'step': 17453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:44.718102', 'step': 17453, 'epoch': 3} {'type': 'loss', 'content': 0.0793616846203804, 'timestamp': '2025-09-10 02:57:44.720310', 'step': 17454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:44.774333', 'step': 17454, 'epoch': 3} {'type': 'loss', 'content': 0.12326022237539291, 'timestamp': '2025-09-10 02:57:44.776545', 'step': 17455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:44.830662', 'step': 17455, 'epoch': 3} {'type': 'loss', 'content': 0.1194501519203186, 'timestamp': '2025-09-10 02:57:44.836679', 'step': 17456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:44.888956', 'step': 17456, 'epoch': 3} {'type': 'loss', 'content': 0.041283268481492996, 'timestamp': '2025-09-10 02:57:44.891122', 'step': 17457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:44.943929', 'step': 17457, 'epoch': 3} {'type': 'loss', 'content': 0.06258939951658249, 'timestamp': '2025-09-10 02:57:44.946543', 'step': 17458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:45.000016', 'step': 17458, 'epoch': 3} {'type': 'loss', 'content': 0.07115714251995087, 'timestamp': '2025-09-10 02:57:45.002551', 'step': 17459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:45.055487', 'step': 17459, 'epoch': 3} {'type': 'loss', 'content': 0.060468386858701706, 'timestamp': '2025-09-10 02:57:45.061394', 'step': 17460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:45.115004', 'step': 17460, 'epoch': 3} {'type': 'loss', 'content': 0.13425947725772858, 'timestamp': '2025-09-10 02:57:45.117356', 'step': 17461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:45.170679', 'step': 17461, 'epoch': 3} {'type': 'loss', 'content': 0.06696972250938416, 'timestamp': '2025-09-10 02:57:45.172800', 'step': 17462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:45.226672', 'step': 17462, 'epoch': 3} {'type': 'loss', 'content': 0.05571353808045387, 'timestamp': '2025-09-10 02:57:45.228920', 'step': 17463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:45.282410', 'step': 17463, 'epoch': 3} {'type': 'loss', 'content': 0.11038883030414581, 'timestamp': '2025-09-10 02:57:45.288571', 'step': 17464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:45.341183', 'step': 17464, 'epoch': 3} {'type': 'loss', 'content': 0.12491834908723831, 'timestamp': '2025-09-10 02:57:45.343405', 'step': 17465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:45.396868', 'step': 17465, 'epoch': 3} {'type': 'loss', 'content': 0.08021150529384613, 'timestamp': '2025-09-10 02:57:45.398992', 'step': 17466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:45.452019', 'step': 17466, 'epoch': 3} {'type': 'loss', 'content': 0.1396796554327011, 'timestamp': '2025-09-10 02:57:45.454291', 'step': 17467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:45.508667', 'step': 17467, 'epoch': 3} {'type': 'loss', 'content': 0.1864490509033203, 'timestamp': '2025-09-10 02:57:45.514625', 'step': 17468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:45.566939', 'step': 17468, 'epoch': 3} {'type': 'loss', 'content': 0.10417328774929047, 'timestamp': '2025-09-10 02:57:45.569164', 'step': 17469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:45.622629', 'step': 17469, 'epoch': 3} {'type': 'loss', 'content': 0.06804682314395905, 'timestamp': '2025-09-10 02:57:45.624785', 'step': 17470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:45.678168', 'step': 17470, 'epoch': 3} {'type': 'loss', 'content': 0.13129198551177979, 'timestamp': '2025-09-10 02:57:45.680362', 'step': 17471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:45.733955', 'step': 17471, 'epoch': 3} {'type': 'loss', 'content': 0.043314121663570404, 'timestamp': '2025-09-10 02:57:45.740346', 'step': 17472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:45.794079', 'step': 17472, 'epoch': 3} {'type': 'loss', 'content': 0.1467439830303192, 'timestamp': '2025-09-10 02:57:45.796435', 'step': 17473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:45.850695', 'step': 17473, 'epoch': 3} {'type': 'loss', 'content': 0.10423293709754944, 'timestamp': '2025-09-10 02:57:45.852992', 'step': 17474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:45.906833', 'step': 17474, 'epoch': 3} {'type': 'loss', 'content': 0.07692626863718033, 'timestamp': '2025-09-10 02:57:45.909089', 'step': 17475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:45.962380', 'step': 17475, 'epoch': 3} {'type': 'loss', 'content': 0.06983692944049835, 'timestamp': '2025-09-10 02:57:45.968398', 'step': 17476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:46.022305', 'step': 17476, 'epoch': 3} {'type': 'loss', 'content': 0.05707605928182602, 'timestamp': '2025-09-10 02:57:46.024570', 'step': 17477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:46.077694', 'step': 17477, 'epoch': 3} {'type': 'loss', 'content': 0.08632812649011612, 'timestamp': '2025-09-10 02:57:46.079903', 'step': 17478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:46.133401', 'step': 17478, 'epoch': 3} {'type': 'loss', 'content': 0.05096728354692459, 'timestamp': '2025-09-10 02:57:46.135574', 'step': 17479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:46.188111', 'step': 17479, 'epoch': 3} {'type': 'loss', 'content': 0.08048362284898758, 'timestamp': '2025-09-10 02:57:46.194092', 'step': 17480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:46.247744', 'step': 17480, 'epoch': 3} {'type': 'loss', 'content': 0.05583072453737259, 'timestamp': '2025-09-10 02:57:46.249897', 'step': 17481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:46.303119', 'step': 17481, 'epoch': 3} {'type': 'loss', 'content': 0.017544474452733994, 'timestamp': '2025-09-10 02:57:46.305441', 'step': 17482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:46.358728', 'step': 17482, 'epoch': 3} {'type': 'loss', 'content': 0.09060298651456833, 'timestamp': '2025-09-10 02:57:46.360920', 'step': 17483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:46.413543', 'step': 17483, 'epoch': 3} {'type': 'loss', 'content': 0.019727671518921852, 'timestamp': '2025-09-10 02:57:46.419567', 'step': 17484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:46.472131', 'step': 17484, 'epoch': 3} {'type': 'loss', 'content': 0.1043325811624527, 'timestamp': '2025-09-10 02:57:46.474425', 'step': 17485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:46.528084', 'step': 17485, 'epoch': 3} {'type': 'loss', 'content': 0.06119561940431595, 'timestamp': '2025-09-10 02:57:46.530456', 'step': 17486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:46.583849', 'step': 17486, 'epoch': 3} {'type': 'loss', 'content': 0.08094008266925812, 'timestamp': '2025-09-10 02:57:46.586219', 'step': 17487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:46.638675', 'step': 17487, 'epoch': 3} {'type': 'loss', 'content': 0.1169106587767601, 'timestamp': '2025-09-10 02:57:46.644699', 'step': 17488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:46.697022', 'step': 17488, 'epoch': 3} {'type': 'loss', 'content': 0.058047059923410416, 'timestamp': '2025-09-10 02:57:46.699129', 'step': 17489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:46.752071', 'step': 17489, 'epoch': 3} {'type': 'loss', 'content': 0.07299548387527466, 'timestamp': '2025-09-10 02:57:46.754317', 'step': 17490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:46.808454', 'step': 17490, 'epoch': 3} {'type': 'loss', 'content': 0.13203369081020355, 'timestamp': '2025-09-10 02:57:46.810491', 'step': 17491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:46.865626', 'step': 17491, 'epoch': 3} {'type': 'loss', 'content': 0.06744692474603653, 'timestamp': '2025-09-10 02:57:46.871683', 'step': 17492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:46.925463', 'step': 17492, 'epoch': 3} {'type': 'loss', 'content': 0.07548283785581589, 'timestamp': '2025-09-10 02:57:46.927675', 'step': 17493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:46.980817', 'step': 17493, 'epoch': 3} {'type': 'loss', 'content': 0.04873323440551758, 'timestamp': '2025-09-10 02:57:46.983086', 'step': 17494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:57:47.043389', 'step': 17494, 'epoch': 3} {'type': 'loss', 'content': 0.06674481183290482, 'timestamp': '2025-09-10 02:57:47.045640', 'step': 17495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:47.098931', 'step': 17495, 'epoch': 3} {'type': 'loss', 'content': 0.040001265704631805, 'timestamp': '2025-09-10 02:57:47.104968', 'step': 17496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:47.159985', 'step': 17496, 'epoch': 3} {'type': 'loss', 'content': 0.02741444669663906, 'timestamp': '2025-09-10 02:57:47.162298', 'step': 17497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:47.214777', 'step': 17497, 'epoch': 3} {'type': 'loss', 'content': 0.139749214053154, 'timestamp': '2025-09-10 02:57:47.216920', 'step': 17498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:47.269959', 'step': 17498, 'epoch': 3} {'type': 'loss', 'content': 0.1272580474615097, 'timestamp': '2025-09-10 02:57:47.272292', 'step': 17499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:47.325030', 'step': 17499, 'epoch': 3} {'type': 'loss', 'content': 0.05079588294029236, 'timestamp': '2025-09-10 02:57:47.331205', 'step': 17500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17500', 'timestamp': '2025-09-10 02:57:47.688926', 'step': 17500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:47.748675', 'step': 17500, 'epoch': 3} {'type': 'loss', 'content': 0.09770721197128296, 'timestamp': '2025-09-10 02:57:47.751200', 'step': 17501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:47.806244', 'step': 17501, 'epoch': 3} {'type': 'loss', 'content': 0.09814050793647766, 'timestamp': '2025-09-10 02:57:47.808823', 'step': 17502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:47.862821', 'step': 17502, 'epoch': 3} {'type': 'loss', 'content': 0.1460481733083725, 'timestamp': '2025-09-10 02:57:47.865441', 'step': 17503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:47.919738', 'step': 17503, 'epoch': 3} {'type': 'loss', 'content': 0.14165623486042023, 'timestamp': '2025-09-10 02:57:47.926087', 'step': 17504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:47.978930', 'step': 17504, 'epoch': 3} {'type': 'loss', 'content': 0.1117405891418457, 'timestamp': '2025-09-10 02:57:47.989027', 'step': 17505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:48.055447', 'step': 17505, 'epoch': 3} {'type': 'loss', 'content': 0.07907357811927795, 'timestamp': '2025-09-10 02:57:48.059028', 'step': 17506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.114234', 'step': 17506, 'epoch': 3} {'type': 'loss', 'content': 0.11620086431503296, 'timestamp': '2025-09-10 02:57:48.116524', 'step': 17507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.170526', 'step': 17507, 'epoch': 3} {'type': 'loss', 'content': 0.13193538784980774, 'timestamp': '2025-09-10 02:57:48.176440', 'step': 17508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.228735', 'step': 17508, 'epoch': 3} {'type': 'loss', 'content': 0.06623244285583496, 'timestamp': '2025-09-10 02:57:48.230867', 'step': 17509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:48.283911', 'step': 17509, 'epoch': 3} {'type': 'loss', 'content': 0.06502992659807205, 'timestamp': '2025-09-10 02:57:48.286141', 'step': 17510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:48.339224', 'step': 17510, 'epoch': 3} {'type': 'loss', 'content': 0.1167190670967102, 'timestamp': '2025-09-10 02:57:48.341638', 'step': 17511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.394561', 'step': 17511, 'epoch': 3} {'type': 'loss', 'content': 0.04464999586343765, 'timestamp': '2025-09-10 02:57:48.400457', 'step': 17512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:48.454068', 'step': 17512, 'epoch': 3} {'type': 'loss', 'content': 0.1465100646018982, 'timestamp': '2025-09-10 02:57:48.459553', 'step': 17513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.515623', 'step': 17513, 'epoch': 3} {'type': 'loss', 'content': 0.06716310232877731, 'timestamp': '2025-09-10 02:57:48.518302', 'step': 17514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.576262', 'step': 17514, 'epoch': 3} {'type': 'loss', 'content': 0.0424342155456543, 'timestamp': '2025-09-10 02:57:48.578634', 'step': 17515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.632420', 'step': 17515, 'epoch': 3} {'type': 'loss', 'content': 0.10984665900468826, 'timestamp': '2025-09-10 02:57:48.638710', 'step': 17516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.691336', 'step': 17516, 'epoch': 3} {'type': 'loss', 'content': 0.030074093490839005, 'timestamp': '2025-09-10 02:57:48.693445', 'step': 17517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:48.746408', 'step': 17517, 'epoch': 3} {'type': 'loss', 'content': 0.07579874992370605, 'timestamp': '2025-09-10 02:57:48.748727', 'step': 17518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.801788', 'step': 17518, 'epoch': 3} {'type': 'loss', 'content': 0.1049768254160881, 'timestamp': '2025-09-10 02:57:48.804120', 'step': 17519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:48.857387', 'step': 17519, 'epoch': 3} {'type': 'loss', 'content': 0.08048345893621445, 'timestamp': '2025-09-10 02:57:48.866789', 'step': 17520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:48.933494', 'step': 17520, 'epoch': 3} {'type': 'loss', 'content': 0.07417317479848862, 'timestamp': '2025-09-10 02:57:48.935838', 'step': 17521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:49.015231', 'step': 17521, 'epoch': 3} {'type': 'loss', 'content': 0.04874208942055702, 'timestamp': '2025-09-10 02:57:49.017584', 'step': 17522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:49.092388', 'step': 17522, 'epoch': 3} {'type': 'loss', 'content': 0.06225728988647461, 'timestamp': '2025-09-10 02:57:49.094626', 'step': 17523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:49.160286', 'step': 17523, 'epoch': 3} {'type': 'loss', 'content': 0.042532555758953094, 'timestamp': '2025-09-10 02:57:49.166382', 'step': 17524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:49.219997', 'step': 17524, 'epoch': 3} {'type': 'loss', 'content': 0.04646719619631767, 'timestamp': '2025-09-10 02:57:49.222299', 'step': 17525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:49.278073', 'step': 17525, 'epoch': 3} {'type': 'loss', 'content': 0.13201996684074402, 'timestamp': '2025-09-10 02:57:49.280386', 'step': 17526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:49.335557', 'step': 17526, 'epoch': 3} {'type': 'loss', 'content': 0.05266185477375984, 'timestamp': '2025-09-10 02:57:49.338022', 'step': 17527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:49.398765', 'step': 17527, 'epoch': 3} {'type': 'loss', 'content': 0.04497673362493515, 'timestamp': '2025-09-10 02:57:49.405034', 'step': 17528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:49.471067', 'step': 17528, 'epoch': 3} {'type': 'loss', 'content': 0.14861133694648743, 'timestamp': '2025-09-10 02:57:49.473224', 'step': 17529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:49.530335', 'step': 17529, 'epoch': 3} {'type': 'loss', 'content': 0.14185228943824768, 'timestamp': '2025-09-10 02:57:49.532716', 'step': 17530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:49.587121', 'step': 17530, 'epoch': 3} {'type': 'loss', 'content': 0.051306676119565964, 'timestamp': '2025-09-10 02:57:49.590690', 'step': 17531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:49.644960', 'step': 17531, 'epoch': 3} {'type': 'loss', 'content': 0.048916175961494446, 'timestamp': '2025-09-10 02:57:49.650829', 'step': 17532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:49.707312', 'step': 17532, 'epoch': 3} {'type': 'loss', 'content': 0.06716472655534744, 'timestamp': '2025-09-10 02:57:49.709320', 'step': 17533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:49.762264', 'step': 17533, 'epoch': 3} {'type': 'loss', 'content': 0.08920703083276749, 'timestamp': '2025-09-10 02:57:49.764533', 'step': 17534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:49.820483', 'step': 17534, 'epoch': 3} {'type': 'loss', 'content': 0.060023557394742966, 'timestamp': '2025-09-10 02:57:49.824972', 'step': 17535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:49.879978', 'step': 17535, 'epoch': 3} {'type': 'loss', 'content': 0.09313108772039413, 'timestamp': '2025-09-10 02:57:49.885881', 'step': 17536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:49.943394', 'step': 17536, 'epoch': 3} {'type': 'loss', 'content': 0.20950429141521454, 'timestamp': '2025-09-10 02:57:49.947329', 'step': 17537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:50.000766', 'step': 17537, 'epoch': 3} {'type': 'loss', 'content': 0.05425054952502251, 'timestamp': '2025-09-10 02:57:50.005884', 'step': 17538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:50.063261', 'step': 17538, 'epoch': 3} {'type': 'loss', 'content': 0.14087575674057007, 'timestamp': '2025-09-10 02:57:50.065706', 'step': 17539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:50.119123', 'step': 17539, 'epoch': 3} {'type': 'loss', 'content': 0.07700324803590775, 'timestamp': '2025-09-10 02:57:50.125477', 'step': 17540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:50.178276', 'step': 17540, 'epoch': 3} {'type': 'loss', 'content': 0.05248875543475151, 'timestamp': '2025-09-10 02:57:50.180445', 'step': 17541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:50.232840', 'step': 17541, 'epoch': 3} {'type': 'loss', 'content': 0.048230838030576706, 'timestamp': '2025-09-10 02:57:50.234955', 'step': 17542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:50.294257', 'step': 17542, 'epoch': 3} {'type': 'loss', 'content': 0.11043698340654373, 'timestamp': '2025-09-10 02:57:50.296476', 'step': 17543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:50.349616', 'step': 17543, 'epoch': 3} {'type': 'loss', 'content': 0.11232592165470123, 'timestamp': '2025-09-10 02:57:50.355688', 'step': 17544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:50.408205', 'step': 17544, 'epoch': 3} {'type': 'loss', 'content': 0.07089825719594955, 'timestamp': '2025-09-10 02:57:50.410622', 'step': 17545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:50.463773', 'step': 17545, 'epoch': 3} {'type': 'loss', 'content': 0.10391057282686234, 'timestamp': '2025-09-10 02:57:50.465752', 'step': 17546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:50.519462', 'step': 17546, 'epoch': 3} {'type': 'loss', 'content': 0.08627418428659439, 'timestamp': '2025-09-10 02:57:50.521807', 'step': 17547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:50.574907', 'step': 17547, 'epoch': 3} {'type': 'loss', 'content': 0.04851256310939789, 'timestamp': '2025-09-10 02:57:50.580836', 'step': 17548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:50.632986', 'step': 17548, 'epoch': 3} {'type': 'loss', 'content': 0.07409383356571198, 'timestamp': '2025-09-10 02:57:50.635107', 'step': 17549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:50.688301', 'step': 17549, 'epoch': 3} {'type': 'loss', 'content': 0.05916094779968262, 'timestamp': '2025-09-10 02:57:50.690334', 'step': 17550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:50.743890', 'step': 17550, 'epoch': 3} {'type': 'loss', 'content': 0.10843151062726974, 'timestamp': '2025-09-10 02:57:50.745946', 'step': 17551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:50.799032', 'step': 17551, 'epoch': 3} {'type': 'loss', 'content': 0.0630846619606018, 'timestamp': '2025-09-10 02:57:50.804688', 'step': 17552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:50.857895', 'step': 17552, 'epoch': 3} {'type': 'loss', 'content': 0.06881023943424225, 'timestamp': '2025-09-10 02:57:50.860411', 'step': 17553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:50.912843', 'step': 17553, 'epoch': 3} {'type': 'loss', 'content': 0.09033865481615067, 'timestamp': '2025-09-10 02:57:50.915303', 'step': 17554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:50.968417', 'step': 17554, 'epoch': 3} {'type': 'loss', 'content': 0.0731380432844162, 'timestamp': '2025-09-10 02:57:50.970396', 'step': 17555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:51.026675', 'step': 17555, 'epoch': 3} {'type': 'loss', 'content': 0.08089202642440796, 'timestamp': '2025-09-10 02:57:51.034191', 'step': 17556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:51.087866', 'step': 17556, 'epoch': 3} {'type': 'loss', 'content': 0.07025804370641708, 'timestamp': '2025-09-10 02:57:51.090193', 'step': 17557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:51.143500', 'step': 17557, 'epoch': 3} {'type': 'loss', 'content': 0.03695837780833244, 'timestamp': '2025-09-10 02:57:51.145677', 'step': 17558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:51.198988', 'step': 17558, 'epoch': 3} {'type': 'loss', 'content': 0.11275999248027802, 'timestamp': '2025-09-10 02:57:51.201137', 'step': 17559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:51.255233', 'step': 17559, 'epoch': 3} {'type': 'loss', 'content': 0.11517099291086197, 'timestamp': '2025-09-10 02:57:51.261010', 'step': 17560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:51.316422', 'step': 17560, 'epoch': 3} {'type': 'loss', 'content': 0.13782525062561035, 'timestamp': '2025-09-10 02:57:51.318451', 'step': 17561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:51.371006', 'step': 17561, 'epoch': 3} {'type': 'loss', 'content': 0.10908541828393936, 'timestamp': '2025-09-10 02:57:51.373213', 'step': 17562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:51.427370', 'step': 17562, 'epoch': 3} {'type': 'loss', 'content': 0.11193667352199554, 'timestamp': '2025-09-10 02:57:51.429590', 'step': 17563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:51.482604', 'step': 17563, 'epoch': 3} {'type': 'loss', 'content': 0.04724092036485672, 'timestamp': '2025-09-10 02:57:51.488346', 'step': 17564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:51.540942', 'step': 17564, 'epoch': 3} {'type': 'loss', 'content': 0.094730906188488, 'timestamp': '2025-09-10 02:57:51.543021', 'step': 17565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:51.595988', 'step': 17565, 'epoch': 3} {'type': 'loss', 'content': 0.07563147693872452, 'timestamp': '2025-09-10 02:57:51.598197', 'step': 17566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:51.652137', 'step': 17566, 'epoch': 3} {'type': 'loss', 'content': 0.07910212129354477, 'timestamp': '2025-09-10 02:57:51.654527', 'step': 17567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:51.707782', 'step': 17567, 'epoch': 3} {'type': 'loss', 'content': 0.04871758818626404, 'timestamp': '2025-09-10 02:57:51.713804', 'step': 17568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:51.766798', 'step': 17568, 'epoch': 3} {'type': 'loss', 'content': 0.052809812128543854, 'timestamp': '2025-09-10 02:57:51.769016', 'step': 17569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:51.821801', 'step': 17569, 'epoch': 3} {'type': 'loss', 'content': 0.11181852966547012, 'timestamp': '2025-09-10 02:57:51.824177', 'step': 17570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:51.876747', 'step': 17570, 'epoch': 3} {'type': 'loss', 'content': 0.09903864562511444, 'timestamp': '2025-09-10 02:57:51.878874', 'step': 17571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:51.931402', 'step': 17571, 'epoch': 3} {'type': 'loss', 'content': 0.03146945312619209, 'timestamp': '2025-09-10 02:57:51.937230', 'step': 17572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:51.989749', 'step': 17572, 'epoch': 3} {'type': 'loss', 'content': 0.0654165968298912, 'timestamp': '2025-09-10 02:57:51.991885', 'step': 17573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:52.044642', 'step': 17573, 'epoch': 3} {'type': 'loss', 'content': 0.013661468401551247, 'timestamp': '2025-09-10 02:57:52.046889', 'step': 17574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:52.100166', 'step': 17574, 'epoch': 3} {'type': 'loss', 'content': 0.06595666706562042, 'timestamp': '2025-09-10 02:57:52.102340', 'step': 17575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:52.155679', 'step': 17575, 'epoch': 3} {'type': 'loss', 'content': 0.07125594466924667, 'timestamp': '2025-09-10 02:57:52.161510', 'step': 17576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:57:52.214980', 'step': 17576, 'epoch': 3} {'type': 'loss', 'content': 0.06970137357711792, 'timestamp': '2025-09-10 02:57:52.217002', 'step': 17577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:52.271368', 'step': 17577, 'epoch': 3} {'type': 'loss', 'content': 0.03769238665699959, 'timestamp': '2025-09-10 02:57:52.273593', 'step': 17578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:52.328206', 'step': 17578, 'epoch': 3} {'type': 'loss', 'content': 0.03346872702240944, 'timestamp': '2025-09-10 02:57:52.330297', 'step': 17579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:52.384063', 'step': 17579, 'epoch': 3} {'type': 'loss', 'content': 0.1498810350894928, 'timestamp': '2025-09-10 02:57:52.390008', 'step': 17580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:52.443200', 'step': 17580, 'epoch': 3} {'type': 'loss', 'content': 0.14621184766292572, 'timestamp': '2025-09-10 02:57:52.445424', 'step': 17581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:52.499696', 'step': 17581, 'epoch': 3} {'type': 'loss', 'content': 0.06778493523597717, 'timestamp': '2025-09-10 02:57:52.501735', 'step': 17582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:52.555101', 'step': 17582, 'epoch': 3} {'type': 'loss', 'content': 0.07643640786409378, 'timestamp': '2025-09-10 02:57:52.558671', 'step': 17583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:52.613169', 'step': 17583, 'epoch': 3} {'type': 'loss', 'content': 0.12034948915243149, 'timestamp': '2025-09-10 02:57:52.619116', 'step': 17584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:52.671737', 'step': 17584, 'epoch': 3} {'type': 'loss', 'content': 0.10338244587182999, 'timestamp': '2025-09-10 02:57:52.673956', 'step': 17585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:52.727685', 'step': 17585, 'epoch': 3} {'type': 'loss', 'content': 0.0635281503200531, 'timestamp': '2025-09-10 02:57:52.729842', 'step': 17586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:52.784545', 'step': 17586, 'epoch': 3} {'type': 'loss', 'content': 0.149680495262146, 'timestamp': '2025-09-10 02:57:52.786937', 'step': 17587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:52.839964', 'step': 17587, 'epoch': 3} {'type': 'loss', 'content': 0.07387198507785797, 'timestamp': '2025-09-10 02:57:52.846177', 'step': 17588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:52.899254', 'step': 17588, 'epoch': 3} {'type': 'loss', 'content': 0.09010442346334457, 'timestamp': '2025-09-10 02:57:52.901508', 'step': 17589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:52.954446', 'step': 17589, 'epoch': 3} {'type': 'loss', 'content': 0.08837498724460602, 'timestamp': '2025-09-10 02:57:52.956652', 'step': 17590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:53.010091', 'step': 17590, 'epoch': 3} {'type': 'loss', 'content': 0.08344115316867828, 'timestamp': '2025-09-10 02:57:53.012081', 'step': 17591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:53.065155', 'step': 17591, 'epoch': 3} {'type': 'loss', 'content': 0.058620572090148926, 'timestamp': '2025-09-10 02:57:53.070963', 'step': 17592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:53.123474', 'step': 17592, 'epoch': 3} {'type': 'loss', 'content': 0.07120184600353241, 'timestamp': '2025-09-10 02:57:53.125542', 'step': 17593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:53.178279', 'step': 17593, 'epoch': 3} {'type': 'loss', 'content': 0.06962711364030838, 'timestamp': '2025-09-10 02:57:53.180592', 'step': 17594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:53.233942', 'step': 17594, 'epoch': 3} {'type': 'loss', 'content': 0.07331939041614532, 'timestamp': '2025-09-10 02:57:53.236432', 'step': 17595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:57:53.290138', 'step': 17595, 'epoch': 3} {'type': 'loss', 'content': 0.06035153940320015, 'timestamp': '2025-09-10 02:57:53.296152', 'step': 17596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:53.348426', 'step': 17596, 'epoch': 3} {'type': 'loss', 'content': 0.07771528512239456, 'timestamp': '2025-09-10 02:57:53.352179', 'step': 17597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:53.405835', 'step': 17597, 'epoch': 3} {'type': 'loss', 'content': 0.08096300810575485, 'timestamp': '2025-09-10 02:57:53.407966', 'step': 17598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:53.460612', 'step': 17598, 'epoch': 3} {'type': 'loss', 'content': 0.03458089381456375, 'timestamp': '2025-09-10 02:57:53.462597', 'step': 17599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:53.515407', 'step': 17599, 'epoch': 3} {'type': 'loss', 'content': 0.11330746114253998, 'timestamp': '2025-09-10 02:57:53.521082', 'step': 17600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:53.573572', 'step': 17600, 'epoch': 3} {'type': 'loss', 'content': 0.04269050434231758, 'timestamp': '2025-09-10 02:57:53.575674', 'step': 17601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:53.628812', 'step': 17601, 'epoch': 3} {'type': 'loss', 'content': 0.09298726916313171, 'timestamp': '2025-09-10 02:57:53.631196', 'step': 17602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:53.684319', 'step': 17602, 'epoch': 3} {'type': 'loss', 'content': 0.09274131804704666, 'timestamp': '2025-09-10 02:57:53.687530', 'step': 17603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:53.741316', 'step': 17603, 'epoch': 3} {'type': 'loss', 'content': 0.07600606232881546, 'timestamp': '2025-09-10 02:57:53.747059', 'step': 17604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:53.799778', 'step': 17604, 'epoch': 3} {'type': 'loss', 'content': 0.021452831104397774, 'timestamp': '2025-09-10 02:57:53.801833', 'step': 17605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:53.855200', 'step': 17605, 'epoch': 3} {'type': 'loss', 'content': 0.06280602514743805, 'timestamp': '2025-09-10 02:57:53.857401', 'step': 17606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:53.910415', 'step': 17606, 'epoch': 3} {'type': 'loss', 'content': 0.048734426498413086, 'timestamp': '2025-09-10 02:57:53.912481', 'step': 17607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:53.965608', 'step': 17607, 'epoch': 3} {'type': 'loss', 'content': 0.08363228291273117, 'timestamp': '2025-09-10 02:57:53.971370', 'step': 17608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:54.024187', 'step': 17608, 'epoch': 3} {'type': 'loss', 'content': 0.07152706384658813, 'timestamp': '2025-09-10 02:57:54.026305', 'step': 17609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:54.079456', 'step': 17609, 'epoch': 3} {'type': 'loss', 'content': 0.10591967403888702, 'timestamp': '2025-09-10 02:57:54.081483', 'step': 17610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:54.134305', 'step': 17610, 'epoch': 3} {'type': 'loss', 'content': 0.12358637154102325, 'timestamp': '2025-09-10 02:57:54.136293', 'step': 17611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:54.189621', 'step': 17611, 'epoch': 3} {'type': 'loss', 'content': 0.16350293159484863, 'timestamp': '2025-09-10 02:57:54.195292', 'step': 17612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:54.249129', 'step': 17612, 'epoch': 3} {'type': 'loss', 'content': 0.028105158358812332, 'timestamp': '2025-09-10 02:57:54.251129', 'step': 17613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:54.304070', 'step': 17613, 'epoch': 3} {'type': 'loss', 'content': 0.09499761462211609, 'timestamp': '2025-09-10 02:57:54.306080', 'step': 17614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:54.359880', 'step': 17614, 'epoch': 3} {'type': 'loss', 'content': 0.13040651381015778, 'timestamp': '2025-09-10 02:57:54.362332', 'step': 17615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:54.416152', 'step': 17615, 'epoch': 3} {'type': 'loss', 'content': 0.081539586186409, 'timestamp': '2025-09-10 02:57:54.422432', 'step': 17616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:54.477497', 'step': 17616, 'epoch': 3} {'type': 'loss', 'content': 0.07240565121173859, 'timestamp': '2025-09-10 02:57:54.479578', 'step': 17617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:54.532733', 'step': 17617, 'epoch': 3} {'type': 'loss', 'content': 0.10745297372341156, 'timestamp': '2025-09-10 02:57:54.534715', 'step': 17618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:54.588713', 'step': 17618, 'epoch': 3} {'type': 'loss', 'content': 0.13659387826919556, 'timestamp': '2025-09-10 02:57:54.590817', 'step': 17619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:54.643756', 'step': 17619, 'epoch': 3} {'type': 'loss', 'content': 0.08164933323860168, 'timestamp': '2025-09-10 02:57:54.649781', 'step': 17620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:54.702296', 'step': 17620, 'epoch': 3} {'type': 'loss', 'content': 0.03182278573513031, 'timestamp': '2025-09-10 02:57:54.704294', 'step': 17621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:54.758407', 'step': 17621, 'epoch': 3} {'type': 'loss', 'content': 0.10515227168798447, 'timestamp': '2025-09-10 02:57:54.760598', 'step': 17622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:54.813521', 'step': 17622, 'epoch': 3} {'type': 'loss', 'content': 0.14904750883579254, 'timestamp': '2025-09-10 02:57:54.815704', 'step': 17623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:54.868895', 'step': 17623, 'epoch': 3} {'type': 'loss', 'content': 0.10942395776510239, 'timestamp': '2025-09-10 02:57:54.874811', 'step': 17624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:54.927786', 'step': 17624, 'epoch': 3} {'type': 'loss', 'content': 0.1383102387189865, 'timestamp': '2025-09-10 02:57:54.929991', 'step': 17625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:54.983247', 'step': 17625, 'epoch': 3} {'type': 'loss', 'content': 0.10500596463680267, 'timestamp': '2025-09-10 02:57:54.985307', 'step': 17626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:55.046850', 'step': 17626, 'epoch': 3} {'type': 'loss', 'content': 0.13101863861083984, 'timestamp': '2025-09-10 02:57:55.048984', 'step': 17627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:55.102270', 'step': 17627, 'epoch': 3} {'type': 'loss', 'content': 0.10241718590259552, 'timestamp': '2025-09-10 02:57:55.108358', 'step': 17628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:55.162057', 'step': 17628, 'epoch': 3} {'type': 'loss', 'content': 0.09284351021051407, 'timestamp': '2025-09-10 02:57:55.164341', 'step': 17629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:55.217339', 'step': 17629, 'epoch': 3} {'type': 'loss', 'content': 0.1676226407289505, 'timestamp': '2025-09-10 02:57:55.219696', 'step': 17630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:55.272903', 'step': 17630, 'epoch': 3} {'type': 'loss', 'content': 0.1199495866894722, 'timestamp': '2025-09-10 02:57:55.275371', 'step': 17631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:55.328028', 'step': 17631, 'epoch': 3} {'type': 'loss', 'content': 0.13011746108531952, 'timestamp': '2025-09-10 02:57:55.333721', 'step': 17632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:55.387085', 'step': 17632, 'epoch': 3} {'type': 'loss', 'content': 0.1562417447566986, 'timestamp': '2025-09-10 02:57:55.389314', 'step': 17633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:55.442056', 'step': 17633, 'epoch': 3} {'type': 'loss', 'content': 0.06442061066627502, 'timestamp': '2025-09-10 02:57:55.444127', 'step': 17634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:55.497216', 'step': 17634, 'epoch': 3} {'type': 'loss', 'content': 0.12976600229740143, 'timestamp': '2025-09-10 02:57:55.499335', 'step': 17635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:55.552383', 'step': 17635, 'epoch': 3} {'type': 'loss', 'content': 0.08374800533056259, 'timestamp': '2025-09-10 02:57:55.558258', 'step': 17636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:55.611192', 'step': 17636, 'epoch': 3} {'type': 'loss', 'content': 0.10758297145366669, 'timestamp': '2025-09-10 02:57:55.613414', 'step': 17637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:55.667897', 'step': 17637, 'epoch': 3} {'type': 'loss', 'content': 0.11417250335216522, 'timestamp': '2025-09-10 02:57:55.669888', 'step': 17638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:55.722854', 'step': 17638, 'epoch': 3} {'type': 'loss', 'content': 0.055188585072755814, 'timestamp': '2025-09-10 02:57:55.724913', 'step': 17639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:55.778093', 'step': 17639, 'epoch': 3} {'type': 'loss', 'content': 0.20477284491062164, 'timestamp': '2025-09-10 02:57:55.783769', 'step': 17640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:55.837348', 'step': 17640, 'epoch': 3} {'type': 'loss', 'content': 0.1730702668428421, 'timestamp': '2025-09-10 02:57:55.839533', 'step': 17641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:55.892906', 'step': 17641, 'epoch': 3} {'type': 'loss', 'content': 0.1335894763469696, 'timestamp': '2025-09-10 02:57:55.895005', 'step': 17642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:55.948514', 'step': 17642, 'epoch': 3} {'type': 'loss', 'content': 0.12013322114944458, 'timestamp': '2025-09-10 02:57:55.950586', 'step': 17643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:56.004179', 'step': 17643, 'epoch': 3} {'type': 'loss', 'content': 0.08391143381595612, 'timestamp': '2025-09-10 02:57:56.010074', 'step': 17644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:56.062308', 'step': 17644, 'epoch': 3} {'type': 'loss', 'content': 0.10094792395830154, 'timestamp': '2025-09-10 02:57:56.064681', 'step': 17645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:56.118884', 'step': 17645, 'epoch': 3} {'type': 'loss', 'content': 0.07302899658679962, 'timestamp': '2025-09-10 02:57:56.121173', 'step': 17646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:56.177341', 'step': 17646, 'epoch': 3} {'type': 'loss', 'content': 0.10863835364580154, 'timestamp': '2025-09-10 02:57:56.179575', 'step': 17647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:56.232946', 'step': 17647, 'epoch': 3} {'type': 'loss', 'content': 0.09098400920629501, 'timestamp': '2025-09-10 02:57:56.238847', 'step': 17648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:56.292446', 'step': 17648, 'epoch': 3} {'type': 'loss', 'content': 0.08878911286592484, 'timestamp': '2025-09-10 02:57:56.294661', 'step': 17649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:56.347700', 'step': 17649, 'epoch': 3} {'type': 'loss', 'content': 0.0680110901594162, 'timestamp': '2025-09-10 02:57:56.350051', 'step': 17650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:56.404192', 'step': 17650, 'epoch': 3} {'type': 'loss', 'content': 0.21657641232013702, 'timestamp': '2025-09-10 02:57:56.406495', 'step': 17651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:56.459851', 'step': 17651, 'epoch': 3} {'type': 'loss', 'content': 0.11008990556001663, 'timestamp': '2025-09-10 02:57:56.465865', 'step': 17652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:56.518894', 'step': 17652, 'epoch': 3} {'type': 'loss', 'content': 0.10095351934432983, 'timestamp': '2025-09-10 02:57:56.521142', 'step': 17653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:56.574060', 'step': 17653, 'epoch': 3} {'type': 'loss', 'content': 0.11516259610652924, 'timestamp': '2025-09-10 02:57:56.576301', 'step': 17654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:56.629812', 'step': 17654, 'epoch': 3} {'type': 'loss', 'content': 0.16641849279403687, 'timestamp': '2025-09-10 02:57:56.632046', 'step': 17655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:56.687443', 'step': 17655, 'epoch': 3} {'type': 'loss', 'content': 0.12089186161756516, 'timestamp': '2025-09-10 02:57:56.693390', 'step': 17656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:56.746155', 'step': 17656, 'epoch': 3} {'type': 'loss', 'content': 0.06760022044181824, 'timestamp': '2025-09-10 02:57:56.748335', 'step': 17657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:56.801327', 'step': 17657, 'epoch': 3} {'type': 'loss', 'content': 0.12268470972776413, 'timestamp': '2025-09-10 02:57:56.803739', 'step': 17658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:56.857590', 'step': 17658, 'epoch': 3} {'type': 'loss', 'content': 0.06784467399120331, 'timestamp': '2025-09-10 02:57:56.860140', 'step': 17659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:56.912996', 'step': 17659, 'epoch': 3} {'type': 'loss', 'content': 0.1534872204065323, 'timestamp': '2025-09-10 02:57:56.918883', 'step': 17660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:56.972474', 'step': 17660, 'epoch': 3} {'type': 'loss', 'content': 0.123890720307827, 'timestamp': '2025-09-10 02:57:56.974683', 'step': 17661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:57.027941', 'step': 17661, 'epoch': 3} {'type': 'loss', 'content': 0.049729228019714355, 'timestamp': '2025-09-10 02:57:57.030609', 'step': 17662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:57.086395', 'step': 17662, 'epoch': 3} {'type': 'loss', 'content': 0.0875600278377533, 'timestamp': '2025-09-10 02:57:57.088965', 'step': 17663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:57.142685', 'step': 17663, 'epoch': 3} {'type': 'loss', 'content': 0.06969238817691803, 'timestamp': '2025-09-10 02:57:57.148811', 'step': 17664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:57.201248', 'step': 17664, 'epoch': 3} {'type': 'loss', 'content': 0.12740111351013184, 'timestamp': '2025-09-10 02:57:57.203419', 'step': 17665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:57.256745', 'step': 17665, 'epoch': 3} {'type': 'loss', 'content': 0.08438713848590851, 'timestamp': '2025-09-10 02:57:57.258930', 'step': 17666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:57.312519', 'step': 17666, 'epoch': 3} {'type': 'loss', 'content': 0.058214422315359116, 'timestamp': '2025-09-10 02:57:57.314717', 'step': 17667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:57.368329', 'step': 17667, 'epoch': 3} {'type': 'loss', 'content': 0.1045377179980278, 'timestamp': '2025-09-10 02:57:57.374205', 'step': 17668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:57:57.427110', 'step': 17668, 'epoch': 3} {'type': 'loss', 'content': 0.08922681957483292, 'timestamp': '2025-09-10 02:57:57.429295', 'step': 17669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:57.482597', 'step': 17669, 'epoch': 3} {'type': 'loss', 'content': 0.08122078329324722, 'timestamp': '2025-09-10 02:57:57.484888', 'step': 17670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:57.539519', 'step': 17670, 'epoch': 3} {'type': 'loss', 'content': 0.061655640602111816, 'timestamp': '2025-09-10 02:57:57.541847', 'step': 17671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:57.597923', 'step': 17671, 'epoch': 3} {'type': 'loss', 'content': 0.04889581725001335, 'timestamp': '2025-09-10 02:57:57.604021', 'step': 17672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:57.656724', 'step': 17672, 'epoch': 3} {'type': 'loss', 'content': 0.05490368977189064, 'timestamp': '2025-09-10 02:57:57.659114', 'step': 17673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:57.712189', 'step': 17673, 'epoch': 3} {'type': 'loss', 'content': 0.17330840229988098, 'timestamp': '2025-09-10 02:57:57.714658', 'step': 17674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:57.768795', 'step': 17674, 'epoch': 3} {'type': 'loss', 'content': 0.15452103316783905, 'timestamp': '2025-09-10 02:57:57.771057', 'step': 17675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:57.824072', 'step': 17675, 'epoch': 3} {'type': 'loss', 'content': 0.0746692344546318, 'timestamp': '2025-09-10 02:57:57.829914', 'step': 17676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:57.881704', 'step': 17676, 'epoch': 3} {'type': 'loss', 'content': 0.08943691849708557, 'timestamp': '2025-09-10 02:57:57.884034', 'step': 17677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:57.936969', 'step': 17677, 'epoch': 3} {'type': 'loss', 'content': 0.11417993158102036, 'timestamp': '2025-09-10 02:57:57.939117', 'step': 17678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:57.992477', 'step': 17678, 'epoch': 3} {'type': 'loss', 'content': 0.07944244146347046, 'timestamp': '2025-09-10 02:57:57.994612', 'step': 17679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:58.049330', 'step': 17679, 'epoch': 3} {'type': 'loss', 'content': 0.0442616231739521, 'timestamp': '2025-09-10 02:57:58.055082', 'step': 17680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:58.107621', 'step': 17680, 'epoch': 3} {'type': 'loss', 'content': 0.14652292430400848, 'timestamp': '2025-09-10 02:57:58.109851', 'step': 17681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:58.163306', 'step': 17681, 'epoch': 3} {'type': 'loss', 'content': 0.07362972944974899, 'timestamp': '2025-09-10 02:57:58.165564', 'step': 17682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:57:58.219357', 'step': 17682, 'epoch': 3} {'type': 'loss', 'content': 0.06743369251489639, 'timestamp': '2025-09-10 02:57:58.221542', 'step': 17683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:58.274982', 'step': 17683, 'epoch': 3} {'type': 'loss', 'content': 0.12751376628875732, 'timestamp': '2025-09-10 02:57:58.280852', 'step': 17684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:58.333646', 'step': 17684, 'epoch': 3} {'type': 'loss', 'content': 0.11941584199666977, 'timestamp': '2025-09-10 02:57:58.335938', 'step': 17685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:58.389078', 'step': 17685, 'epoch': 3} {'type': 'loss', 'content': 0.13832999765872955, 'timestamp': '2025-09-10 02:57:58.391451', 'step': 17686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:58.445384', 'step': 17686, 'epoch': 3} {'type': 'loss', 'content': 0.09960195422172546, 'timestamp': '2025-09-10 02:57:58.447698', 'step': 17687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:58.501310', 'step': 17687, 'epoch': 3} {'type': 'loss', 'content': 0.04727143421769142, 'timestamp': '2025-09-10 02:57:58.507274', 'step': 17688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:58.560083', 'step': 17688, 'epoch': 3} {'type': 'loss', 'content': 0.07404505461454391, 'timestamp': '2025-09-10 02:57:58.562323', 'step': 17689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:58.619509', 'step': 17689, 'epoch': 3} {'type': 'loss', 'content': 0.15253721177577972, 'timestamp': '2025-09-10 02:57:58.621740', 'step': 17690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:58.676359', 'step': 17690, 'epoch': 3} {'type': 'loss', 'content': 0.06439431011676788, 'timestamp': '2025-09-10 02:57:58.678854', 'step': 17691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:58.731970', 'step': 17691, 'epoch': 3} {'type': 'loss', 'content': 0.18967512249946594, 'timestamp': '2025-09-10 02:57:58.738145', 'step': 17692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:57:58.790831', 'step': 17692, 'epoch': 3} {'type': 'loss', 'content': 0.05879718437790871, 'timestamp': '2025-09-10 02:57:58.793113', 'step': 17693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:58.847258', 'step': 17693, 'epoch': 3} {'type': 'loss', 'content': 0.14056697487831116, 'timestamp': '2025-09-10 02:57:58.849565', 'step': 17694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:57:58.903776', 'step': 17694, 'epoch': 3} {'type': 'loss', 'content': 0.13249292969703674, 'timestamp': '2025-09-10 02:57:58.906302', 'step': 17695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:58.960169', 'step': 17695, 'epoch': 3} {'type': 'loss', 'content': 0.07324577122926712, 'timestamp': '2025-09-10 02:57:58.967897', 'step': 17696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:57:59.022082', 'step': 17696, 'epoch': 3} {'type': 'loss', 'content': 0.11034169793128967, 'timestamp': '2025-09-10 02:57:59.024323', 'step': 17697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:59.077907', 'step': 17697, 'epoch': 3} {'type': 'loss', 'content': 0.10089235007762909, 'timestamp': '2025-09-10 02:57:59.080116', 'step': 17698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:59.134580', 'step': 17698, 'epoch': 3} {'type': 'loss', 'content': 0.06515322625637054, 'timestamp': '2025-09-10 02:57:59.136843', 'step': 17699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:59.191619', 'step': 17699, 'epoch': 3} {'type': 'loss', 'content': 0.14719511568546295, 'timestamp': '2025-09-10 02:57:59.197807', 'step': 17700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:59.251637', 'step': 17700, 'epoch': 3} {'type': 'loss', 'content': 0.08256029337644577, 'timestamp': '2025-09-10 02:57:59.253993', 'step': 17701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:59.307609', 'step': 17701, 'epoch': 3} {'type': 'loss', 'content': 0.11732718348503113, 'timestamp': '2025-09-10 02:57:59.310073', 'step': 17702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:59.363678', 'step': 17702, 'epoch': 3} {'type': 'loss', 'content': 0.11782301962375641, 'timestamp': '2025-09-10 02:57:59.365927', 'step': 17703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:57:59.421103', 'step': 17703, 'epoch': 3} {'type': 'loss', 'content': 0.11338158696889877, 'timestamp': '2025-09-10 02:57:59.427323', 'step': 17704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:59.480414', 'step': 17704, 'epoch': 3} {'type': 'loss', 'content': 0.0397505909204483, 'timestamp': '2025-09-10 02:57:59.482601', 'step': 17705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:59.535945', 'step': 17705, 'epoch': 3} {'type': 'loss', 'content': 0.17993222177028656, 'timestamp': '2025-09-10 02:57:59.538185', 'step': 17706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:59.592160', 'step': 17706, 'epoch': 3} {'type': 'loss', 'content': 0.13571560382843018, 'timestamp': '2025-09-10 02:57:59.594425', 'step': 17707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:57:59.648191', 'step': 17707, 'epoch': 3} {'type': 'loss', 'content': 0.08495673537254333, 'timestamp': '2025-09-10 02:57:59.654466', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:58:12.498329', 'step': 17708, 'epoch': 3} {'type': 'pplx', 'content': 13674.021166203387, 'timestamp': '2025-09-10 02:58:12.503485', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:12.562655', 'step': 17708, 'epoch': 3} {'type': 'loss', 'content': 0.0506661981344223, 'timestamp': '2025-09-10 02:58:12.566430', 'step': 17709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:12.627214', 'step': 17709, 'epoch': 3} {'type': 'loss', 'content': 0.12312997132539749, 'timestamp': '2025-09-10 02:58:12.629480', 'step': 17710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:12.686278', 'step': 17710, 'epoch': 3} {'type': 'loss', 'content': 0.10788667947053909, 'timestamp': '2025-09-10 02:58:12.688582', 'step': 17711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:12.743556', 'step': 17711, 'epoch': 3} {'type': 'loss', 'content': 0.2587330937385559, 'timestamp': '2025-09-10 02:58:12.750114', 'step': 17712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:12.803809', 'step': 17712, 'epoch': 3} {'type': 'loss', 'content': 0.07937314361333847, 'timestamp': '2025-09-10 02:58:12.805891', 'step': 17713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:12.862713', 'step': 17713, 'epoch': 3} {'type': 'loss', 'content': 0.1318894773721695, 'timestamp': '2025-09-10 02:58:12.864765', 'step': 17714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:12.919428', 'step': 17714, 'epoch': 3} {'type': 'loss', 'content': 0.07916882634162903, 'timestamp': '2025-09-10 02:58:12.921549', 'step': 17715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:12.976118', 'step': 17715, 'epoch': 3} {'type': 'loss', 'content': 0.0891246497631073, 'timestamp': '2025-09-10 02:58:12.982557', 'step': 17716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:13.036649', 'step': 17716, 'epoch': 3} {'type': 'loss', 'content': 0.07344069331884384, 'timestamp': '2025-09-10 02:58:13.038976', 'step': 17717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:58:13.104026', 'step': 17717, 'epoch': 3} {'type': 'loss', 'content': 0.09933114796876907, 'timestamp': '2025-09-10 02:58:13.106064', 'step': 17718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:13.161666', 'step': 17718, 'epoch': 3} {'type': 'loss', 'content': 0.028076577931642532, 'timestamp': '2025-09-10 02:58:13.163793', 'step': 17719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:13.218949', 'step': 17719, 'epoch': 3} {'type': 'loss', 'content': 0.09083899855613708, 'timestamp': '2025-09-10 02:58:13.225224', 'step': 17720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:13.280512', 'step': 17720, 'epoch': 3} {'type': 'loss', 'content': 0.04619291424751282, 'timestamp': '2025-09-10 02:58:13.282947', 'step': 17721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:13.339152', 'step': 17721, 'epoch': 3} {'type': 'loss', 'content': 0.09310521185398102, 'timestamp': '2025-09-10 02:58:13.341213', 'step': 17722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:13.397498', 'step': 17722, 'epoch': 3} {'type': 'loss', 'content': 0.040871478617191315, 'timestamp': '2025-09-10 02:58:13.399476', 'step': 17723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:13.454298', 'step': 17723, 'epoch': 3} {'type': 'loss', 'content': 0.13447138667106628, 'timestamp': '2025-09-10 02:58:13.460816', 'step': 17724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:13.515374', 'step': 17724, 'epoch': 3} {'type': 'loss', 'content': 0.10242798179388046, 'timestamp': '2025-09-10 02:58:13.517926', 'step': 17725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:13.573520', 'step': 17725, 'epoch': 3} {'type': 'loss', 'content': 0.07779887318611145, 'timestamp': '2025-09-10 02:58:13.575589', 'step': 17726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:13.631903', 'step': 17726, 'epoch': 3} {'type': 'loss', 'content': 0.14774589240550995, 'timestamp': '2025-09-10 02:58:13.634340', 'step': 17727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:13.693250', 'step': 17727, 'epoch': 3} {'type': 'loss', 'content': 0.11624391376972198, 'timestamp': '2025-09-10 02:58:13.699802', 'step': 17728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:13.754444', 'step': 17728, 'epoch': 3} {'type': 'loss', 'content': 0.12355535477399826, 'timestamp': '2025-09-10 02:58:13.756707', 'step': 17729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:13.813488', 'step': 17729, 'epoch': 3} {'type': 'loss', 'content': 0.12181919813156128, 'timestamp': '2025-09-10 02:58:13.816051', 'step': 17730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:13.873143', 'step': 17730, 'epoch': 3} {'type': 'loss', 'content': 0.0564962700009346, 'timestamp': '2025-09-10 02:58:13.875550', 'step': 17731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:13.932439', 'step': 17731, 'epoch': 3} {'type': 'loss', 'content': 0.015413972549140453, 'timestamp': '2025-09-10 02:58:13.939216', 'step': 17732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:13.995937', 'step': 17732, 'epoch': 3} {'type': 'loss', 'content': 0.08345972746610641, 'timestamp': '2025-09-10 02:58:13.998119', 'step': 17733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:14.056477', 'step': 17733, 'epoch': 3} {'type': 'loss', 'content': 0.07084060460329056, 'timestamp': '2025-09-10 02:58:14.058683', 'step': 17734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:14.114724', 'step': 17734, 'epoch': 3} {'type': 'loss', 'content': 0.10349557548761368, 'timestamp': '2025-09-10 02:58:14.116943', 'step': 17735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:14.173907', 'step': 17735, 'epoch': 3} {'type': 'loss', 'content': 0.11107853055000305, 'timestamp': '2025-09-10 02:58:14.180477', 'step': 17736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:14.241769', 'step': 17736, 'epoch': 3} {'type': 'loss', 'content': 0.09585834294557571, 'timestamp': '2025-09-10 02:58:14.244017', 'step': 17737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:14.299316', 'step': 17737, 'epoch': 3} {'type': 'loss', 'content': 0.06264672428369522, 'timestamp': '2025-09-10 02:58:14.301525', 'step': 17738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:14.357412', 'step': 17738, 'epoch': 3} {'type': 'loss', 'content': 0.2287757247686386, 'timestamp': '2025-09-10 02:58:14.359858', 'step': 17739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:14.414737', 'step': 17739, 'epoch': 3} {'type': 'loss', 'content': 0.14843659102916718, 'timestamp': '2025-09-10 02:58:14.421416', 'step': 17740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:14.475707', 'step': 17740, 'epoch': 3} {'type': 'loss', 'content': 0.1101195365190506, 'timestamp': '2025-09-10 02:58:14.478003', 'step': 17741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:14.533299', 'step': 17741, 'epoch': 3} {'type': 'loss', 'content': 0.1468721330165863, 'timestamp': '2025-09-10 02:58:14.535444', 'step': 17742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:14.590985', 'step': 17742, 'epoch': 3} {'type': 'loss', 'content': 0.055216189473867416, 'timestamp': '2025-09-10 02:58:14.593248', 'step': 17743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:14.649772', 'step': 17743, 'epoch': 3} {'type': 'loss', 'content': 0.06978388875722885, 'timestamp': '2025-09-10 02:58:14.655647', 'step': 17744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:14.711021', 'step': 17744, 'epoch': 3} {'type': 'loss', 'content': 0.10945766419172287, 'timestamp': '2025-09-10 02:58:14.713183', 'step': 17745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:14.769762', 'step': 17745, 'epoch': 3} {'type': 'loss', 'content': 0.11495308578014374, 'timestamp': '2025-09-10 02:58:14.772116', 'step': 17746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:14.827120', 'step': 17746, 'epoch': 3} {'type': 'loss', 'content': 0.0835043340921402, 'timestamp': '2025-09-10 02:58:14.829415', 'step': 17747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:14.884652', 'step': 17747, 'epoch': 3} {'type': 'loss', 'content': 0.03799543157219887, 'timestamp': '2025-09-10 02:58:14.891159', 'step': 17748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:14.946401', 'step': 17748, 'epoch': 3} {'type': 'loss', 'content': 0.032380931079387665, 'timestamp': '2025-09-10 02:58:14.948577', 'step': 17749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:15.003590', 'step': 17749, 'epoch': 3} {'type': 'loss', 'content': 0.14788468182086945, 'timestamp': '2025-09-10 02:58:15.006063', 'step': 17750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:15.060433', 'step': 17750, 'epoch': 3} {'type': 'loss', 'content': 0.1179417222738266, 'timestamp': '2025-09-10 02:58:15.062763', 'step': 17751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:15.118328', 'step': 17751, 'epoch': 3} {'type': 'loss', 'content': 0.05780645087361336, 'timestamp': '2025-09-10 02:58:15.124559', 'step': 17752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:15.178807', 'step': 17752, 'epoch': 3} {'type': 'loss', 'content': 0.05250304192304611, 'timestamp': '2025-09-10 02:58:15.181124', 'step': 17753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:15.238368', 'step': 17753, 'epoch': 3} {'type': 'loss', 'content': 0.08672800660133362, 'timestamp': '2025-09-10 02:58:15.240814', 'step': 17754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:15.298375', 'step': 17754, 'epoch': 3} {'type': 'loss', 'content': 0.056297630071640015, 'timestamp': '2025-09-10 02:58:15.300649', 'step': 17755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:15.355779', 'step': 17755, 'epoch': 3} {'type': 'loss', 'content': 0.09388261288404465, 'timestamp': '2025-09-10 02:58:15.362156', 'step': 17756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:15.417268', 'step': 17756, 'epoch': 3} {'type': 'loss', 'content': 0.12976613640785217, 'timestamp': '2025-09-10 02:58:15.419647', 'step': 17757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:15.474236', 'step': 17757, 'epoch': 3} {'type': 'loss', 'content': 0.11935240030288696, 'timestamp': '2025-09-10 02:58:15.476437', 'step': 17758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:15.531968', 'step': 17758, 'epoch': 3} {'type': 'loss', 'content': 0.07413920760154724, 'timestamp': '2025-09-10 02:58:15.534315', 'step': 17759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:15.589643', 'step': 17759, 'epoch': 3} {'type': 'loss', 'content': 0.11715390533208847, 'timestamp': '2025-09-10 02:58:15.596121', 'step': 17760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:15.649634', 'step': 17760, 'epoch': 3} {'type': 'loss', 'content': 0.15962183475494385, 'timestamp': '2025-09-10 02:58:15.651899', 'step': 17761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:15.706129', 'step': 17761, 'epoch': 3} {'type': 'loss', 'content': 0.08431839942932129, 'timestamp': '2025-09-10 02:58:15.708410', 'step': 17762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:15.763403', 'step': 17762, 'epoch': 3} {'type': 'loss', 'content': 0.07849232852458954, 'timestamp': '2025-09-10 02:58:15.765414', 'step': 17763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:15.819660', 'step': 17763, 'epoch': 3} {'type': 'loss', 'content': 0.14140185713768005, 'timestamp': '2025-09-10 02:58:15.825698', 'step': 17764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:15.878908', 'step': 17764, 'epoch': 3} {'type': 'loss', 'content': 0.03681362047791481, 'timestamp': '2025-09-10 02:58:15.881097', 'step': 17765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:15.934895', 'step': 17765, 'epoch': 3} {'type': 'loss', 'content': 0.08256170153617859, 'timestamp': '2025-09-10 02:58:15.937101', 'step': 17766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:15.992649', 'step': 17766, 'epoch': 3} {'type': 'loss', 'content': 0.0425659604370594, 'timestamp': '2025-09-10 02:58:15.994781', 'step': 17767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:16.049513', 'step': 17767, 'epoch': 3} {'type': 'loss', 'content': 0.05113854631781578, 'timestamp': '2025-09-10 02:58:16.055801', 'step': 17768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:16.121469', 'step': 17768, 'epoch': 3} {'type': 'loss', 'content': 0.06859274208545685, 'timestamp': '2025-09-10 02:58:16.123592', 'step': 17769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:16.179696', 'step': 17769, 'epoch': 3} {'type': 'loss', 'content': 0.05843393877148628, 'timestamp': '2025-09-10 02:58:16.181994', 'step': 17770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:16.236487', 'step': 17770, 'epoch': 3} {'type': 'loss', 'content': 0.054175764322280884, 'timestamp': '2025-09-10 02:58:16.238671', 'step': 17771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:16.293338', 'step': 17771, 'epoch': 3} {'type': 'loss', 'content': 0.07663583755493164, 'timestamp': '2025-09-10 02:58:16.299751', 'step': 17772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:16.353888', 'step': 17772, 'epoch': 3} {'type': 'loss', 'content': 0.06856953352689743, 'timestamp': '2025-09-10 02:58:16.356212', 'step': 17773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:16.411298', 'step': 17773, 'epoch': 3} {'type': 'loss', 'content': 0.06968369334936142, 'timestamp': '2025-09-10 02:58:16.413572', 'step': 17774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:16.468251', 'step': 17774, 'epoch': 3} {'type': 'loss', 'content': 0.09699384868144989, 'timestamp': '2025-09-10 02:58:16.470604', 'step': 17775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:16.526001', 'step': 17775, 'epoch': 3} {'type': 'loss', 'content': 0.061104077845811844, 'timestamp': '2025-09-10 02:58:16.532433', 'step': 17776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:16.586669', 'step': 17776, 'epoch': 3} {'type': 'loss', 'content': 0.11593249440193176, 'timestamp': '2025-09-10 02:58:16.588876', 'step': 17777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:16.644551', 'step': 17777, 'epoch': 3} {'type': 'loss', 'content': 0.1021837368607521, 'timestamp': '2025-09-10 02:58:16.646872', 'step': 17778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:16.701942', 'step': 17778, 'epoch': 3} {'type': 'loss', 'content': 0.21139752864837646, 'timestamp': '2025-09-10 02:58:16.704158', 'step': 17779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:16.760114', 'step': 17779, 'epoch': 3} {'type': 'loss', 'content': 0.1441858559846878, 'timestamp': '2025-09-10 02:58:16.766260', 'step': 17780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:16.820874', 'step': 17780, 'epoch': 3} {'type': 'loss', 'content': 0.08374590426683426, 'timestamp': '2025-09-10 02:58:16.823177', 'step': 17781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:16.878275', 'step': 17781, 'epoch': 3} {'type': 'loss', 'content': 0.1381547600030899, 'timestamp': '2025-09-10 02:58:16.880769', 'step': 17782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:16.935942', 'step': 17782, 'epoch': 3} {'type': 'loss', 'content': 0.011459475383162498, 'timestamp': '2025-09-10 02:58:16.938265', 'step': 17783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:16.993433', 'step': 17783, 'epoch': 3} {'type': 'loss', 'content': 0.03603334724903107, 'timestamp': '2025-09-10 02:58:16.999813', 'step': 17784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:17.055097', 'step': 17784, 'epoch': 3} {'type': 'loss', 'content': 0.0621449276804924, 'timestamp': '2025-09-10 02:58:17.057223', 'step': 17785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:17.114021', 'step': 17785, 'epoch': 3} {'type': 'loss', 'content': 0.09107297658920288, 'timestamp': '2025-09-10 02:58:17.116401', 'step': 17786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:17.171074', 'step': 17786, 'epoch': 3} {'type': 'loss', 'content': 0.05753041058778763, 'timestamp': '2025-09-10 02:58:17.173185', 'step': 17787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:17.228050', 'step': 17787, 'epoch': 3} {'type': 'loss', 'content': 0.08474210649728775, 'timestamp': '2025-09-10 02:58:17.234526', 'step': 17788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:58:17.288706', 'step': 17788, 'epoch': 3} {'type': 'loss', 'content': 0.15637902915477753, 'timestamp': '2025-09-10 02:58:17.291071', 'step': 17789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:17.347723', 'step': 17789, 'epoch': 3} {'type': 'loss', 'content': 0.05566421523690224, 'timestamp': '2025-09-10 02:58:17.349751', 'step': 17790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:17.406065', 'step': 17790, 'epoch': 3} {'type': 'loss', 'content': 0.08242015540599823, 'timestamp': '2025-09-10 02:58:17.408369', 'step': 17791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:17.471766', 'step': 17791, 'epoch': 3} {'type': 'loss', 'content': 0.0510973297059536, 'timestamp': '2025-09-10 02:58:17.478020', 'step': 17792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:17.531513', 'step': 17792, 'epoch': 3} {'type': 'loss', 'content': 0.15990301966667175, 'timestamp': '2025-09-10 02:58:17.533521', 'step': 17793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:17.588434', 'step': 17793, 'epoch': 3} {'type': 'loss', 'content': 0.06396675109863281, 'timestamp': '2025-09-10 02:58:17.590488', 'step': 17794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:17.645372', 'step': 17794, 'epoch': 3} {'type': 'loss', 'content': 0.09661190211772919, 'timestamp': '2025-09-10 02:58:17.647463', 'step': 17795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:17.701867', 'step': 17795, 'epoch': 3} {'type': 'loss', 'content': 0.14492899179458618, 'timestamp': '2025-09-10 02:58:17.708379', 'step': 17796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:17.763429', 'step': 17796, 'epoch': 3} {'type': 'loss', 'content': 0.15171664953231812, 'timestamp': '2025-09-10 02:58:17.765650', 'step': 17797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:17.820240', 'step': 17797, 'epoch': 3} {'type': 'loss', 'content': 0.11402393877506256, 'timestamp': '2025-09-10 02:58:17.822431', 'step': 17798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:17.882925', 'step': 17798, 'epoch': 3} {'type': 'loss', 'content': 0.09367278218269348, 'timestamp': '2025-09-10 02:58:17.885080', 'step': 17799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:17.944831', 'step': 17799, 'epoch': 3} {'type': 'loss', 'content': 0.09917471557855606, 'timestamp': '2025-09-10 02:58:17.951087', 'step': 17800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:18.005369', 'step': 17800, 'epoch': 3} {'type': 'loss', 'content': 0.07212819904088974, 'timestamp': '2025-09-10 02:58:18.007726', 'step': 17801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:18.062488', 'step': 17801, 'epoch': 3} {'type': 'loss', 'content': 0.15638618171215057, 'timestamp': '2025-09-10 02:58:18.064841', 'step': 17802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:18.122194', 'step': 17802, 'epoch': 3} {'type': 'loss', 'content': 0.072190061211586, 'timestamp': '2025-09-10 02:58:18.125534', 'step': 17803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:18.181142', 'step': 17803, 'epoch': 3} {'type': 'loss', 'content': 0.17367085814476013, 'timestamp': '2025-09-10 02:58:18.187563', 'step': 17804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:18.242114', 'step': 17804, 'epoch': 3} {'type': 'loss', 'content': 0.0516052283346653, 'timestamp': '2025-09-10 02:58:18.244240', 'step': 17805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:18.299207', 'step': 17805, 'epoch': 3} {'type': 'loss', 'content': 0.041708845645189285, 'timestamp': '2025-09-10 02:58:18.301498', 'step': 17806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:18.356646', 'step': 17806, 'epoch': 3} {'type': 'loss', 'content': 0.10692010819911957, 'timestamp': '2025-09-10 02:58:18.358657', 'step': 17807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:18.413139', 'step': 17807, 'epoch': 3} {'type': 'loss', 'content': 0.09448632597923279, 'timestamp': '2025-09-10 02:58:18.419483', 'step': 17808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:18.473915', 'step': 17808, 'epoch': 3} {'type': 'loss', 'content': 0.08882499486207962, 'timestamp': '2025-09-10 02:58:18.476111', 'step': 17809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:18.530653', 'step': 17809, 'epoch': 3} {'type': 'loss', 'content': 0.0782785415649414, 'timestamp': '2025-09-10 02:58:18.532998', 'step': 17810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:18.590660', 'step': 17810, 'epoch': 3} {'type': 'loss', 'content': 0.08147258311510086, 'timestamp': '2025-09-10 02:58:18.592815', 'step': 17811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:18.648439', 'step': 17811, 'epoch': 3} {'type': 'loss', 'content': 0.04782106354832649, 'timestamp': '2025-09-10 02:58:18.654771', 'step': 17812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:18.709644', 'step': 17812, 'epoch': 3} {'type': 'loss', 'content': 0.06322021782398224, 'timestamp': '2025-09-10 02:58:18.711718', 'step': 17813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:18.768051', 'step': 17813, 'epoch': 3} {'type': 'loss', 'content': 0.08157225698232651, 'timestamp': '2025-09-10 02:58:18.770118', 'step': 17814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:18.824795', 'step': 17814, 'epoch': 3} {'type': 'loss', 'content': 0.07119645178318024, 'timestamp': '2025-09-10 02:58:18.826848', 'step': 17815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:58:18.892501', 'step': 17815, 'epoch': 3} {'type': 'loss', 'content': 0.11945348232984543, 'timestamp': '2025-09-10 02:58:18.898847', 'step': 17816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:18.953215', 'step': 17816, 'epoch': 3} {'type': 'loss', 'content': 0.05116865411400795, 'timestamp': '2025-09-10 02:58:18.955400', 'step': 17817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:19.010797', 'step': 17817, 'epoch': 3} {'type': 'loss', 'content': 0.07412711530923843, 'timestamp': '2025-09-10 02:58:19.013007', 'step': 17818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:19.072011', 'step': 17818, 'epoch': 3} {'type': 'loss', 'content': 0.19944077730178833, 'timestamp': '2025-09-10 02:58:19.074461', 'step': 17819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:19.135948', 'step': 17819, 'epoch': 3} {'type': 'loss', 'content': 0.06681444495916367, 'timestamp': '2025-09-10 02:58:19.143999', 'step': 17820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:19.203335', 'step': 17820, 'epoch': 3} {'type': 'loss', 'content': 0.16485945880413055, 'timestamp': '2025-09-10 02:58:19.206120', 'step': 17821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:19.267909', 'step': 17821, 'epoch': 3} {'type': 'loss', 'content': 0.04853899031877518, 'timestamp': '2025-09-10 02:58:19.270023', 'step': 17822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:19.326178', 'step': 17822, 'epoch': 3} {'type': 'loss', 'content': 0.1386253535747528, 'timestamp': '2025-09-10 02:58:19.330511', 'step': 17823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:19.390975', 'step': 17823, 'epoch': 3} {'type': 'loss', 'content': 0.018495837226510048, 'timestamp': '2025-09-10 02:58:19.403792', 'step': 17824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:19.463482', 'step': 17824, 'epoch': 3} {'type': 'loss', 'content': 0.07089181989431381, 'timestamp': '2025-09-10 02:58:19.467093', 'step': 17825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:19.525611', 'step': 17825, 'epoch': 3} {'type': 'loss', 'content': 0.20046286284923553, 'timestamp': '2025-09-10 02:58:19.529172', 'step': 17826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:19.589119', 'step': 17826, 'epoch': 3} {'type': 'loss', 'content': 0.0793800875544548, 'timestamp': '2025-09-10 02:58:19.591402', 'step': 17827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:19.646951', 'step': 17827, 'epoch': 3} {'type': 'loss', 'content': 0.10867110639810562, 'timestamp': '2025-09-10 02:58:19.653985', 'step': 17828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:19.711558', 'step': 17828, 'epoch': 3} {'type': 'loss', 'content': 0.05998736247420311, 'timestamp': '2025-09-10 02:58:19.713874', 'step': 17829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:19.769694', 'step': 17829, 'epoch': 3} {'type': 'loss', 'content': 0.04258253425359726, 'timestamp': '2025-09-10 02:58:19.771874', 'step': 17830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:19.827234', 'step': 17830, 'epoch': 3} {'type': 'loss', 'content': 0.05494249612092972, 'timestamp': '2025-09-10 02:58:19.829402', 'step': 17831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:19.884550', 'step': 17831, 'epoch': 3} {'type': 'loss', 'content': 0.11701210588216782, 'timestamp': '2025-09-10 02:58:19.892768', 'step': 17832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:19.949662', 'step': 17832, 'epoch': 3} {'type': 'loss', 'content': 0.07628222554922104, 'timestamp': '2025-09-10 02:58:19.952090', 'step': 17833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:20.007633', 'step': 17833, 'epoch': 3} {'type': 'loss', 'content': 0.06079233065247536, 'timestamp': '2025-09-10 02:58:20.010350', 'step': 17834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:20.065736', 'step': 17834, 'epoch': 3} {'type': 'loss', 'content': 0.08685008436441422, 'timestamp': '2025-09-10 02:58:20.067853', 'step': 17835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:20.129308', 'step': 17835, 'epoch': 3} {'type': 'loss', 'content': 0.07154452800750732, 'timestamp': '2025-09-10 02:58:20.135738', 'step': 17836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:20.190172', 'step': 17836, 'epoch': 3} {'type': 'loss', 'content': 0.05490061640739441, 'timestamp': '2025-09-10 02:58:20.192465', 'step': 17837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:20.249420', 'step': 17837, 'epoch': 3} {'type': 'loss', 'content': 0.08021871000528336, 'timestamp': '2025-09-10 02:58:20.251970', 'step': 17838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:20.309099', 'step': 17838, 'epoch': 3} {'type': 'loss', 'content': 0.1200496256351471, 'timestamp': '2025-09-10 02:58:20.311466', 'step': 17839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:20.367335', 'step': 17839, 'epoch': 3} {'type': 'loss', 'content': 0.15244734287261963, 'timestamp': '2025-09-10 02:58:20.373917', 'step': 17840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:20.430351', 'step': 17840, 'epoch': 3} {'type': 'loss', 'content': 0.15622852742671967, 'timestamp': '2025-09-10 02:58:20.432617', 'step': 17841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:20.490672', 'step': 17841, 'epoch': 3} {'type': 'loss', 'content': 0.03750027343630791, 'timestamp': '2025-09-10 02:58:20.492826', 'step': 17842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:20.551537', 'step': 17842, 'epoch': 3} {'type': 'loss', 'content': 0.08918058127164841, 'timestamp': '2025-09-10 02:58:20.553767', 'step': 17843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:20.608921', 'step': 17843, 'epoch': 3} {'type': 'loss', 'content': 0.0836181715130806, 'timestamp': '2025-09-10 02:58:20.615385', 'step': 17844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:20.669334', 'step': 17844, 'epoch': 3} {'type': 'loss', 'content': 0.12144792824983597, 'timestamp': '2025-09-10 02:58:20.671652', 'step': 17845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:20.727086', 'step': 17845, 'epoch': 3} {'type': 'loss', 'content': 0.11019115149974823, 'timestamp': '2025-09-10 02:58:20.729436', 'step': 17846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:20.785034', 'step': 17846, 'epoch': 3} {'type': 'loss', 'content': 0.028360962867736816, 'timestamp': '2025-09-10 02:58:20.787454', 'step': 17847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:20.843186', 'step': 17847, 'epoch': 3} {'type': 'loss', 'content': 0.07764507830142975, 'timestamp': '2025-09-10 02:58:20.849679', 'step': 17848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:20.904879', 'step': 17848, 'epoch': 3} {'type': 'loss', 'content': 0.13421319425106049, 'timestamp': '2025-09-10 02:58:20.906958', 'step': 17849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:20.962243', 'step': 17849, 'epoch': 3} {'type': 'loss', 'content': 0.07445240020751953, 'timestamp': '2025-09-10 02:58:20.964332', 'step': 17850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:21.019376', 'step': 17850, 'epoch': 3} {'type': 'loss', 'content': 0.09418743848800659, 'timestamp': '2025-09-10 02:58:21.021477', 'step': 17851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:21.076079', 'step': 17851, 'epoch': 3} {'type': 'loss', 'content': 0.12014380842447281, 'timestamp': '2025-09-10 02:58:21.082449', 'step': 17852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:21.139657', 'step': 17852, 'epoch': 3} {'type': 'loss', 'content': 0.09549710154533386, 'timestamp': '2025-09-10 02:58:21.142078', 'step': 17853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:21.196886', 'step': 17853, 'epoch': 3} {'type': 'loss', 'content': 0.15199898183345795, 'timestamp': '2025-09-10 02:58:21.199247', 'step': 17854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:21.254442', 'step': 17854, 'epoch': 3} {'type': 'loss', 'content': 0.0616406686604023, 'timestamp': '2025-09-10 02:58:21.256703', 'step': 17855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:58:21.311133', 'step': 17855, 'epoch': 3} {'type': 'loss', 'content': 0.15989018976688385, 'timestamp': '2025-09-10 02:58:21.317439', 'step': 17856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:21.370851', 'step': 17856, 'epoch': 3} {'type': 'loss', 'content': 0.18082399666309357, 'timestamp': '2025-09-10 02:58:21.372624', 'step': 17857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:21.425892', 'step': 17857, 'epoch': 3} {'type': 'loss', 'content': 0.15663549304008484, 'timestamp': '2025-09-10 02:58:21.428093', 'step': 17858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:21.481458', 'step': 17858, 'epoch': 3} {'type': 'loss', 'content': 0.12387401610612869, 'timestamp': '2025-09-10 02:58:21.483738', 'step': 17859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:21.538663', 'step': 17859, 'epoch': 3} {'type': 'loss', 'content': 0.12132634222507477, 'timestamp': '2025-09-10 02:58:21.545110', 'step': 17860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:21.601452', 'step': 17860, 'epoch': 3} {'type': 'loss', 'content': 0.08508861064910889, 'timestamp': '2025-09-10 02:58:21.603720', 'step': 17861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:21.657837', 'step': 17861, 'epoch': 3} {'type': 'loss', 'content': 0.07089792937040329, 'timestamp': '2025-09-10 02:58:21.660009', 'step': 17862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:21.714112', 'step': 17862, 'epoch': 3} {'type': 'loss', 'content': 0.0779399499297142, 'timestamp': '2025-09-10 02:58:21.716701', 'step': 17863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:21.771647', 'step': 17863, 'epoch': 3} {'type': 'loss', 'content': 0.06882868707180023, 'timestamp': '2025-09-10 02:58:21.777788', 'step': 17864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:21.831215', 'step': 17864, 'epoch': 3} {'type': 'loss', 'content': 0.08737032115459442, 'timestamp': '2025-09-10 02:58:21.833364', 'step': 17865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:21.887626', 'step': 17865, 'epoch': 3} {'type': 'loss', 'content': 0.07606286555528641, 'timestamp': '2025-09-10 02:58:21.889864', 'step': 17866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:21.963903', 'step': 17866, 'epoch': 3} {'type': 'loss', 'content': 0.06882646679878235, 'timestamp': '2025-09-10 02:58:21.965893', 'step': 17867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:22.020996', 'step': 17867, 'epoch': 3} {'type': 'loss', 'content': 0.07739345729351044, 'timestamp': '2025-09-10 02:58:22.027317', 'step': 17868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:22.082206', 'step': 17868, 'epoch': 3} {'type': 'loss', 'content': 0.052512917667627335, 'timestamp': '2025-09-10 02:58:22.084533', 'step': 17869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:22.139677', 'step': 17869, 'epoch': 3} {'type': 'loss', 'content': 0.061276406049728394, 'timestamp': '2025-09-10 02:58:22.142068', 'step': 17870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:22.202482', 'step': 17870, 'epoch': 3} {'type': 'loss', 'content': 0.10708153992891312, 'timestamp': '2025-09-10 02:58:22.204720', 'step': 17871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:22.260576', 'step': 17871, 'epoch': 3} {'type': 'loss', 'content': 0.19112297892570496, 'timestamp': '2025-09-10 02:58:22.266854', 'step': 17872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:22.322018', 'step': 17872, 'epoch': 3} {'type': 'loss', 'content': 0.0923105999827385, 'timestamp': '2025-09-10 02:58:22.324221', 'step': 17873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:22.380070', 'step': 17873, 'epoch': 3} {'type': 'loss', 'content': 0.1045050248503685, 'timestamp': '2025-09-10 02:58:22.382401', 'step': 17874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:22.437289', 'step': 17874, 'epoch': 3} {'type': 'loss', 'content': 0.05758751183748245, 'timestamp': '2025-09-10 02:58:22.439624', 'step': 17875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:22.494765', 'step': 17875, 'epoch': 3} {'type': 'loss', 'content': 0.07253605127334595, 'timestamp': '2025-09-10 02:58:22.501152', 'step': 17876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:22.556533', 'step': 17876, 'epoch': 3} {'type': 'loss', 'content': 0.10952062904834747, 'timestamp': '2025-09-10 02:58:22.558625', 'step': 17877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:22.613809', 'step': 17877, 'epoch': 3} {'type': 'loss', 'content': 0.1950448751449585, 'timestamp': '2025-09-10 02:58:22.616004', 'step': 17878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:22.670926', 'step': 17878, 'epoch': 3} {'type': 'loss', 'content': 0.09802199900150299, 'timestamp': '2025-09-10 02:58:22.673215', 'step': 17879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:22.728199', 'step': 17879, 'epoch': 3} {'type': 'loss', 'content': 0.12044253200292587, 'timestamp': '2025-09-10 02:58:22.734639', 'step': 17880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:22.789609', 'step': 17880, 'epoch': 3} {'type': 'loss', 'content': 0.11461273580789566, 'timestamp': '2025-09-10 02:58:22.791948', 'step': 17881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:22.847018', 'step': 17881, 'epoch': 3} {'type': 'loss', 'content': 0.11968781054019928, 'timestamp': '2025-09-10 02:58:22.849271', 'step': 17882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:22.904268', 'step': 17882, 'epoch': 3} {'type': 'loss', 'content': 0.127233624458313, 'timestamp': '2025-09-10 02:58:22.907872', 'step': 17883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:22.964550', 'step': 17883, 'epoch': 3} {'type': 'loss', 'content': 0.08921681344509125, 'timestamp': '2025-09-10 02:58:22.970679', 'step': 17884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:23.024629', 'step': 17884, 'epoch': 3} {'type': 'loss', 'content': 0.11295188963413239, 'timestamp': '2025-09-10 02:58:23.026663', 'step': 17885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:23.080442', 'step': 17885, 'epoch': 3} {'type': 'loss', 'content': 0.11400145292282104, 'timestamp': '2025-09-10 02:58:23.082431', 'step': 17886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:23.140832', 'step': 17886, 'epoch': 3} {'type': 'loss', 'content': 0.1012784019112587, 'timestamp': '2025-09-10 02:58:23.143192', 'step': 17887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:23.197514', 'step': 17887, 'epoch': 3} {'type': 'loss', 'content': 0.03848147392272949, 'timestamp': '2025-09-10 02:58:23.203838', 'step': 17888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:23.259034', 'step': 17888, 'epoch': 3} {'type': 'loss', 'content': 0.11484374105930328, 'timestamp': '2025-09-10 02:58:23.261081', 'step': 17889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:23.315648', 'step': 17889, 'epoch': 3} {'type': 'loss', 'content': 0.08900409936904907, 'timestamp': '2025-09-10 02:58:23.317712', 'step': 17890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:23.372811', 'step': 17890, 'epoch': 3} {'type': 'loss', 'content': 0.1478368192911148, 'timestamp': '2025-09-10 02:58:23.374867', 'step': 17891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:23.430301', 'step': 17891, 'epoch': 3} {'type': 'loss', 'content': 0.11811583489179611, 'timestamp': '2025-09-10 02:58:23.436392', 'step': 17892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:23.491916', 'step': 17892, 'epoch': 3} {'type': 'loss', 'content': 0.0990813747048378, 'timestamp': '2025-09-10 02:58:23.493956', 'step': 17893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:23.548904', 'step': 17893, 'epoch': 3} {'type': 'loss', 'content': 0.05446697026491165, 'timestamp': '2025-09-10 02:58:23.550902', 'step': 17894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:23.605636', 'step': 17894, 'epoch': 3} {'type': 'loss', 'content': 0.09022866934537888, 'timestamp': '2025-09-10 02:58:23.607612', 'step': 17895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:23.662134', 'step': 17895, 'epoch': 3} {'type': 'loss', 'content': 0.057637784630060196, 'timestamp': '2025-09-10 02:58:23.668477', 'step': 17896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:58:23.723234', 'step': 17896, 'epoch': 3} {'type': 'loss', 'content': 0.11944065243005753, 'timestamp': '2025-09-10 02:58:23.725474', 'step': 17897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:23.780036', 'step': 17897, 'epoch': 3} {'type': 'loss', 'content': 0.0456203818321228, 'timestamp': '2025-09-10 02:58:23.782195', 'step': 17898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:23.837084', 'step': 17898, 'epoch': 3} {'type': 'loss', 'content': 0.051073431968688965, 'timestamp': '2025-09-10 02:58:23.839520', 'step': 17899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:23.894214', 'step': 17899, 'epoch': 3} {'type': 'loss', 'content': 0.13162510097026825, 'timestamp': '2025-09-10 02:58:23.900593', 'step': 17900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:23.954596', 'step': 17900, 'epoch': 3} {'type': 'loss', 'content': 0.06964734941720963, 'timestamp': '2025-09-10 02:58:23.956860', 'step': 17901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:24.011752', 'step': 17901, 'epoch': 3} {'type': 'loss', 'content': 0.09224946051836014, 'timestamp': '2025-09-10 02:58:24.013823', 'step': 17902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:24.068873', 'step': 17902, 'epoch': 3} {'type': 'loss', 'content': 0.05137299373745918, 'timestamp': '2025-09-10 02:58:24.071205', 'step': 17903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:24.125924', 'step': 17903, 'epoch': 3} {'type': 'loss', 'content': 0.08264970779418945, 'timestamp': '2025-09-10 02:58:24.132331', 'step': 17904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:24.186465', 'step': 17904, 'epoch': 3} {'type': 'loss', 'content': 0.06824810057878494, 'timestamp': '2025-09-10 02:58:24.188812', 'step': 17905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:24.244213', 'step': 17905, 'epoch': 3} {'type': 'loss', 'content': 0.07536603510379791, 'timestamp': '2025-09-10 02:58:24.246487', 'step': 17906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:24.301296', 'step': 17906, 'epoch': 3} {'type': 'loss', 'content': 0.054264310747385025, 'timestamp': '2025-09-10 02:58:24.303688', 'step': 17907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:58:24.359010', 'step': 17907, 'epoch': 3} {'type': 'loss', 'content': 0.12998759746551514, 'timestamp': '2025-09-10 02:58:24.365399', 'step': 17908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:24.419103', 'step': 17908, 'epoch': 3} {'type': 'loss', 'content': 0.1392899751663208, 'timestamp': '2025-09-10 02:58:24.421337', 'step': 17909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:24.475728', 'step': 17909, 'epoch': 3} {'type': 'loss', 'content': 0.10780464857816696, 'timestamp': '2025-09-10 02:58:24.477582', 'step': 17910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:24.531710', 'step': 17910, 'epoch': 3} {'type': 'loss', 'content': 0.09205950051546097, 'timestamp': '2025-09-10 02:58:24.534367', 'step': 17911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:24.588715', 'step': 17911, 'epoch': 3} {'type': 'loss', 'content': 0.16872736811637878, 'timestamp': '2025-09-10 02:58:24.594879', 'step': 17912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:24.648287', 'step': 17912, 'epoch': 3} {'type': 'loss', 'content': 0.12509730458259583, 'timestamp': '2025-09-10 02:58:24.650432', 'step': 17913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:24.704667', 'step': 17913, 'epoch': 3} {'type': 'loss', 'content': 0.1218247041106224, 'timestamp': '2025-09-10 02:58:24.706838', 'step': 17914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:24.763858', 'step': 17914, 'epoch': 3} {'type': 'loss', 'content': 0.10828642547130585, 'timestamp': '2025-09-10 02:58:24.766098', 'step': 17915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:24.821025', 'step': 17915, 'epoch': 3} {'type': 'loss', 'content': 0.0914551243185997, 'timestamp': '2025-09-10 02:58:24.827415', 'step': 17916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:24.881995', 'step': 17916, 'epoch': 3} {'type': 'loss', 'content': 0.08144917339086533, 'timestamp': '2025-09-10 02:58:24.884346', 'step': 17917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:24.938759', 'step': 17917, 'epoch': 3} {'type': 'loss', 'content': 0.12445583194494247, 'timestamp': '2025-09-10 02:58:24.941052', 'step': 17918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:58:25.001860', 'step': 17918, 'epoch': 3} {'type': 'loss', 'content': 0.07339565455913544, 'timestamp': '2025-09-10 02:58:25.004182', 'step': 17919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:25.061402', 'step': 17919, 'epoch': 3} {'type': 'loss', 'content': 0.0637950599193573, 'timestamp': '2025-09-10 02:58:25.067499', 'step': 17920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.122525', 'step': 17920, 'epoch': 3} {'type': 'loss', 'content': 0.06386617571115494, 'timestamp': '2025-09-10 02:58:25.124810', 'step': 17921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.180050', 'step': 17921, 'epoch': 3} {'type': 'loss', 'content': 0.12895040214061737, 'timestamp': '2025-09-10 02:58:25.182311', 'step': 17922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.237374', 'step': 17922, 'epoch': 3} {'type': 'loss', 'content': 0.08586488664150238, 'timestamp': '2025-09-10 02:58:25.239651', 'step': 17923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.293682', 'step': 17923, 'epoch': 3} {'type': 'loss', 'content': 0.06662697345018387, 'timestamp': '2025-09-10 02:58:25.300115', 'step': 17924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:25.354090', 'step': 17924, 'epoch': 3} {'type': 'loss', 'content': 0.038258083164691925, 'timestamp': '2025-09-10 02:58:25.356327', 'step': 17925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.411873', 'step': 17925, 'epoch': 3} {'type': 'loss', 'content': 0.13006648421287537, 'timestamp': '2025-09-10 02:58:25.414219', 'step': 17926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:25.469259', 'step': 17926, 'epoch': 3} {'type': 'loss', 'content': 0.1186499297618866, 'timestamp': '2025-09-10 02:58:25.471423', 'step': 17927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.525376', 'step': 17927, 'epoch': 3} {'type': 'loss', 'content': 0.031964801251888275, 'timestamp': '2025-09-10 02:58:25.531492', 'step': 17928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:25.585436', 'step': 17928, 'epoch': 3} {'type': 'loss', 'content': 0.11807192862033844, 'timestamp': '2025-09-10 02:58:25.587702', 'step': 17929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:25.642404', 'step': 17929, 'epoch': 3} {'type': 'loss', 'content': 0.172713503241539, 'timestamp': '2025-09-10 02:58:25.644848', 'step': 17930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:25.699420', 'step': 17930, 'epoch': 3} {'type': 'loss', 'content': 0.05258641764521599, 'timestamp': '2025-09-10 02:58:25.701661', 'step': 17931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.757688', 'step': 17931, 'epoch': 3} {'type': 'loss', 'content': 0.012576458044350147, 'timestamp': '2025-09-10 02:58:25.764104', 'step': 17932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.818263', 'step': 17932, 'epoch': 3} {'type': 'loss', 'content': 0.06502504646778107, 'timestamp': '2025-09-10 02:58:25.820542', 'step': 17933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:25.875113', 'step': 17933, 'epoch': 3} {'type': 'loss', 'content': 0.03857358172535896, 'timestamp': '2025-09-10 02:58:25.877258', 'step': 17934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.931988', 'step': 17934, 'epoch': 3} {'type': 'loss', 'content': 0.01699654385447502, 'timestamp': '2025-09-10 02:58:25.934283', 'step': 17935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:25.988649', 'step': 17935, 'epoch': 3} {'type': 'loss', 'content': 0.08041784912347794, 'timestamp': '2025-09-10 02:58:25.994840', 'step': 17936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:26.048857', 'step': 17936, 'epoch': 3} {'type': 'loss', 'content': 0.069148488342762, 'timestamp': '2025-09-10 02:58:26.051025', 'step': 17937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:26.104984', 'step': 17937, 'epoch': 3} {'type': 'loss', 'content': 0.12098146229982376, 'timestamp': '2025-09-10 02:58:26.107373', 'step': 17938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:26.162420', 'step': 17938, 'epoch': 3} {'type': 'loss', 'content': 0.04024752601981163, 'timestamp': '2025-09-10 02:58:26.164457', 'step': 17939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:26.219111', 'step': 17939, 'epoch': 3} {'type': 'loss', 'content': 0.10069318115711212, 'timestamp': '2025-09-10 02:58:26.225327', 'step': 17940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:26.281422', 'step': 17940, 'epoch': 3} {'type': 'loss', 'content': 0.08975791931152344, 'timestamp': '2025-09-10 02:58:26.283836', 'step': 17941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:26.338352', 'step': 17941, 'epoch': 3} {'type': 'loss', 'content': 0.12262004613876343, 'timestamp': '2025-09-10 02:58:26.340430', 'step': 17942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:26.395914', 'step': 17942, 'epoch': 3} {'type': 'loss', 'content': 0.0996466800570488, 'timestamp': '2025-09-10 02:58:26.398010', 'step': 17943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:26.452527', 'step': 17943, 'epoch': 3} {'type': 'loss', 'content': 0.07685810327529907, 'timestamp': '2025-09-10 02:58:26.458998', 'step': 17944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:26.513079', 'step': 17944, 'epoch': 3} {'type': 'loss', 'content': 0.08429290354251862, 'timestamp': '2025-09-10 02:58:26.515329', 'step': 17945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:26.569406', 'step': 17945, 'epoch': 3} {'type': 'loss', 'content': 0.016633417457342148, 'timestamp': '2025-09-10 02:58:26.571421', 'step': 17946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:26.625918', 'step': 17946, 'epoch': 3} {'type': 'loss', 'content': 0.08087103068828583, 'timestamp': '2025-09-10 02:58:26.628159', 'step': 17947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:26.682523', 'step': 17947, 'epoch': 3} {'type': 'loss', 'content': 0.11211703717708588, 'timestamp': '2025-09-10 02:58:26.688723', 'step': 17948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:26.743638', 'step': 17948, 'epoch': 3} {'type': 'loss', 'content': 0.07476310431957245, 'timestamp': '2025-09-10 02:58:26.745718', 'step': 17949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:26.801177', 'step': 17949, 'epoch': 3} {'type': 'loss', 'content': 0.10467713326215744, 'timestamp': '2025-09-10 02:58:26.803493', 'step': 17950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:26.858213', 'step': 17950, 'epoch': 3} {'type': 'loss', 'content': 0.10576140880584717, 'timestamp': '2025-09-10 02:58:26.860448', 'step': 17951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:26.915141', 'step': 17951, 'epoch': 3} {'type': 'loss', 'content': 0.08310031145811081, 'timestamp': '2025-09-10 02:58:26.921493', 'step': 17952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:26.974800', 'step': 17952, 'epoch': 3} {'type': 'loss', 'content': 0.09550013393163681, 'timestamp': '2025-09-10 02:58:26.977152', 'step': 17953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:27.030901', 'step': 17953, 'epoch': 3} {'type': 'loss', 'content': 0.08216944336891174, 'timestamp': '2025-09-10 02:58:27.033330', 'step': 17954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:27.087450', 'step': 17954, 'epoch': 3} {'type': 'loss', 'content': 0.1079149916768074, 'timestamp': '2025-09-10 02:58:27.091294', 'step': 17955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:27.149603', 'step': 17955, 'epoch': 3} {'type': 'loss', 'content': 0.1267162710428238, 'timestamp': '2025-09-10 02:58:27.155671', 'step': 17956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:27.208914', 'step': 17956, 'epoch': 3} {'type': 'loss', 'content': 0.07250773906707764, 'timestamp': '2025-09-10 02:58:27.211180', 'step': 17957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:27.265810', 'step': 17957, 'epoch': 3} {'type': 'loss', 'content': 0.13315926492214203, 'timestamp': '2025-09-10 02:58:27.268362', 'step': 17958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:27.323723', 'step': 17958, 'epoch': 3} {'type': 'loss', 'content': 0.04321836307644844, 'timestamp': '2025-09-10 02:58:27.326221', 'step': 17959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:27.381353', 'step': 17959, 'epoch': 3} {'type': 'loss', 'content': 0.11088249087333679, 'timestamp': '2025-09-10 02:58:27.387795', 'step': 17960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:27.441321', 'step': 17960, 'epoch': 3} {'type': 'loss', 'content': 0.08267417550086975, 'timestamp': '2025-09-10 02:58:27.443572', 'step': 17961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:27.496948', 'step': 17961, 'epoch': 3} {'type': 'loss', 'content': 0.08472413569688797, 'timestamp': '2025-09-10 02:58:27.499243', 'step': 17962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:27.552856', 'step': 17962, 'epoch': 3} {'type': 'loss', 'content': 0.09789646416902542, 'timestamp': '2025-09-10 02:58:27.555285', 'step': 17963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:27.608912', 'step': 17963, 'epoch': 3} {'type': 'loss', 'content': 0.10358788818120956, 'timestamp': '2025-09-10 02:58:27.615020', 'step': 17964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:27.667831', 'step': 17964, 'epoch': 3} {'type': 'loss', 'content': 0.20440693199634552, 'timestamp': '2025-09-10 02:58:27.670190', 'step': 17965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:27.723784', 'step': 17965, 'epoch': 3} {'type': 'loss', 'content': 0.05975880101323128, 'timestamp': '2025-09-10 02:58:27.725991', 'step': 17966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:27.780287', 'step': 17966, 'epoch': 3} {'type': 'loss', 'content': 0.14215190708637238, 'timestamp': '2025-09-10 02:58:27.782488', 'step': 17967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:27.835865', 'step': 17967, 'epoch': 3} {'type': 'loss', 'content': 0.08536054939031601, 'timestamp': '2025-09-10 02:58:27.842238', 'step': 17968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:27.895395', 'step': 17968, 'epoch': 3} {'type': 'loss', 'content': 0.06044897809624672, 'timestamp': '2025-09-10 02:58:27.897756', 'step': 17969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:27.951416', 'step': 17969, 'epoch': 3} {'type': 'loss', 'content': 0.04757585749030113, 'timestamp': '2025-09-10 02:58:27.953717', 'step': 17970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:28.007858', 'step': 17970, 'epoch': 3} {'type': 'loss', 'content': 0.162458598613739, 'timestamp': '2025-09-10 02:58:28.010187', 'step': 17971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:28.063799', 'step': 17971, 'epoch': 3} {'type': 'loss', 'content': 0.09150867164134979, 'timestamp': '2025-09-10 02:58:28.069928', 'step': 17972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:28.122656', 'step': 17972, 'epoch': 3} {'type': 'loss', 'content': 0.12116730213165283, 'timestamp': '2025-09-10 02:58:28.124879', 'step': 17973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:28.177877', 'step': 17973, 'epoch': 3} {'type': 'loss', 'content': 0.11198610812425613, 'timestamp': '2025-09-10 02:58:28.180192', 'step': 17974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:28.234354', 'step': 17974, 'epoch': 3} {'type': 'loss', 'content': 0.043123673647642136, 'timestamp': '2025-09-10 02:58:28.236595', 'step': 17975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:28.292280', 'step': 17975, 'epoch': 3} {'type': 'loss', 'content': 0.1510423868894577, 'timestamp': '2025-09-10 02:58:28.298503', 'step': 17976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:28.352155', 'step': 17976, 'epoch': 3} {'type': 'loss', 'content': 0.04926737770438194, 'timestamp': '2025-09-10 02:58:28.354663', 'step': 17977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:28.409602', 'step': 17977, 'epoch': 3} {'type': 'loss', 'content': 0.15758474171161652, 'timestamp': '2025-09-10 02:58:28.411922', 'step': 17978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:28.466684', 'step': 17978, 'epoch': 3} {'type': 'loss', 'content': 0.09918317943811417, 'timestamp': '2025-09-10 02:58:28.469020', 'step': 17979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:28.527137', 'step': 17979, 'epoch': 3} {'type': 'loss', 'content': 0.11902378499507904, 'timestamp': '2025-09-10 02:58:28.533372', 'step': 17980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:28.588342', 'step': 17980, 'epoch': 3} {'type': 'loss', 'content': 0.015285775996744633, 'timestamp': '2025-09-10 02:58:28.590621', 'step': 17981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:28.644839', 'step': 17981, 'epoch': 3} {'type': 'loss', 'content': 0.05759832635521889, 'timestamp': '2025-09-10 02:58:28.647230', 'step': 17982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:28.701420', 'step': 17982, 'epoch': 3} {'type': 'loss', 'content': 0.14168958365917206, 'timestamp': '2025-09-10 02:58:28.703570', 'step': 17983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:28.757770', 'step': 17983, 'epoch': 3} {'type': 'loss', 'content': 0.10045572370290756, 'timestamp': '2025-09-10 02:58:28.763956', 'step': 17984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:28.819120', 'step': 17984, 'epoch': 3} {'type': 'loss', 'content': 0.05433255061507225, 'timestamp': '2025-09-10 02:58:28.821410', 'step': 17985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:28.879113', 'step': 17985, 'epoch': 3} {'type': 'loss', 'content': 0.06819439679384232, 'timestamp': '2025-09-10 02:58:28.881419', 'step': 17986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:28.937032', 'step': 17986, 'epoch': 3} {'type': 'loss', 'content': 0.056818027049303055, 'timestamp': '2025-09-10 02:58:28.939737', 'step': 17987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:28.995223', 'step': 17987, 'epoch': 3} {'type': 'loss', 'content': 0.07150673866271973, 'timestamp': '2025-09-10 02:58:29.001417', 'step': 17988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:29.054700', 'step': 17988, 'epoch': 3} {'type': 'loss', 'content': 0.16986146569252014, 'timestamp': '2025-09-10 02:58:29.057065', 'step': 17989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:29.121589', 'step': 17989, 'epoch': 3} {'type': 'loss', 'content': 0.10780543833971024, 'timestamp': '2025-09-10 02:58:29.123811', 'step': 17990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:29.179743', 'step': 17990, 'epoch': 3} {'type': 'loss', 'content': 0.07766739279031754, 'timestamp': '2025-09-10 02:58:29.182054', 'step': 17991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:29.235952', 'step': 17991, 'epoch': 3} {'type': 'loss', 'content': 0.09927862882614136, 'timestamp': '2025-09-10 02:58:29.242135', 'step': 17992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:29.295631', 'step': 17992, 'epoch': 3} {'type': 'loss', 'content': 0.11641717702150345, 'timestamp': '2025-09-10 02:58:29.298229', 'step': 17993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:29.352895', 'step': 17993, 'epoch': 3} {'type': 'loss', 'content': 0.06835869699716568, 'timestamp': '2025-09-10 02:58:29.355190', 'step': 17994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:29.411561', 'step': 17994, 'epoch': 3} {'type': 'loss', 'content': 0.0862019956111908, 'timestamp': '2025-09-10 02:58:29.413845', 'step': 17995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:29.468021', 'step': 17995, 'epoch': 3} {'type': 'loss', 'content': 0.03368433192372322, 'timestamp': '2025-09-10 02:58:29.474251', 'step': 17996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:29.528362', 'step': 17996, 'epoch': 3} {'type': 'loss', 'content': 0.11011974513530731, 'timestamp': '2025-09-10 02:58:29.530521', 'step': 17997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:29.585032', 'step': 17997, 'epoch': 3} {'type': 'loss', 'content': 0.04414349049329758, 'timestamp': '2025-09-10 02:58:29.587704', 'step': 17998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:29.643647', 'step': 17998, 'epoch': 3} {'type': 'loss', 'content': 0.062097977846860886, 'timestamp': '2025-09-10 02:58:29.645732', 'step': 17999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:29.710815', 'step': 17999, 'epoch': 3} {'type': 'loss', 'content': 0.23415176570415497, 'timestamp': '2025-09-10 02:58:29.717154', 'step': 18000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18000', 'timestamp': '2025-09-10 02:58:30.075843', 'step': 18000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:30.135557', 'step': 18000, 'epoch': 3} {'type': 'loss', 'content': 0.04764313995838165, 'timestamp': '2025-09-10 02:58:30.138170', 'step': 18001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:30.195639', 'step': 18001, 'epoch': 3} {'type': 'loss', 'content': 0.13092471659183502, 'timestamp': '2025-09-10 02:58:30.197896', 'step': 18002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:30.253624', 'step': 18002, 'epoch': 3} {'type': 'loss', 'content': 0.1476648598909378, 'timestamp': '2025-09-10 02:58:30.255921', 'step': 18003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:30.312140', 'step': 18003, 'epoch': 3} {'type': 'loss', 'content': 0.06627397239208221, 'timestamp': '2025-09-10 02:58:30.318564', 'step': 18004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:30.372774', 'step': 18004, 'epoch': 3} {'type': 'loss', 'content': 0.036809444427490234, 'timestamp': '2025-09-10 02:58:30.375047', 'step': 18005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:30.429218', 'step': 18005, 'epoch': 3} {'type': 'loss', 'content': 0.0835130512714386, 'timestamp': '2025-09-10 02:58:30.431451', 'step': 18006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:30.486103', 'step': 18006, 'epoch': 3} {'type': 'loss', 'content': 0.04762575775384903, 'timestamp': '2025-09-10 02:58:30.488306', 'step': 18007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:30.542681', 'step': 18007, 'epoch': 3} {'type': 'loss', 'content': 0.1130862832069397, 'timestamp': '2025-09-10 02:58:30.548950', 'step': 18008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:30.602619', 'step': 18008, 'epoch': 3} {'type': 'loss', 'content': 0.13763634860515594, 'timestamp': '2025-09-10 02:58:30.606004', 'step': 18009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:30.659628', 'step': 18009, 'epoch': 3} {'type': 'loss', 'content': 0.04700743779540062, 'timestamp': '2025-09-10 02:58:30.661949', 'step': 18010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:30.717230', 'step': 18010, 'epoch': 3} {'type': 'loss', 'content': 0.13816377520561218, 'timestamp': '2025-09-10 02:58:30.719502', 'step': 18011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:30.772627', 'step': 18011, 'epoch': 3} {'type': 'loss', 'content': 0.15134045481681824, 'timestamp': '2025-09-10 02:58:30.778876', 'step': 18012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:30.831508', 'step': 18012, 'epoch': 3} {'type': 'loss', 'content': 0.13836437463760376, 'timestamp': '2025-09-10 02:58:30.833735', 'step': 18013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:30.887758', 'step': 18013, 'epoch': 3} {'type': 'loss', 'content': 0.0772613137960434, 'timestamp': '2025-09-10 02:58:30.890016', 'step': 18014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:30.944052', 'step': 18014, 'epoch': 3} {'type': 'loss', 'content': 0.08381076902151108, 'timestamp': '2025-09-10 02:58:30.946078', 'step': 18015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:30.999970', 'step': 18015, 'epoch': 3} {'type': 'loss', 'content': 0.09041202813386917, 'timestamp': '2025-09-10 02:58:31.006318', 'step': 18016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:31.059731', 'step': 18016, 'epoch': 3} {'type': 'loss', 'content': 0.05727184936404228, 'timestamp': '2025-09-10 02:58:31.061935', 'step': 18017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:31.115883', 'step': 18017, 'epoch': 3} {'type': 'loss', 'content': 0.05346113443374634, 'timestamp': '2025-09-10 02:58:31.117910', 'step': 18018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:31.175742', 'step': 18018, 'epoch': 3} {'type': 'loss', 'content': 0.04826802760362625, 'timestamp': '2025-09-10 02:58:31.177956', 'step': 18019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:31.230364', 'step': 18019, 'epoch': 3} {'type': 'loss', 'content': 0.08209870010614395, 'timestamp': '2025-09-10 02:58:31.236218', 'step': 18020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:31.288745', 'step': 18020, 'epoch': 3} {'type': 'loss', 'content': 0.12180864810943604, 'timestamp': '2025-09-10 02:58:31.290756', 'step': 18021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:31.344155', 'step': 18021, 'epoch': 3} {'type': 'loss', 'content': 0.05989792197942734, 'timestamp': '2025-09-10 02:58:31.346151', 'step': 18022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:31.400424', 'step': 18022, 'epoch': 3} {'type': 'loss', 'content': 0.1259312927722931, 'timestamp': '2025-09-10 02:58:31.402453', 'step': 18023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:31.457332', 'step': 18023, 'epoch': 3} {'type': 'loss', 'content': 0.043073151260614395, 'timestamp': '2025-09-10 02:58:31.463292', 'step': 18024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:31.519481', 'step': 18024, 'epoch': 3} {'type': 'loss', 'content': 0.08497904986143112, 'timestamp': '2025-09-10 02:58:31.521730', 'step': 18025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:31.576279', 'step': 18025, 'epoch': 3} {'type': 'loss', 'content': 0.09747457504272461, 'timestamp': '2025-09-10 02:58:31.578674', 'step': 18026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:31.632687', 'step': 18026, 'epoch': 3} {'type': 'loss', 'content': 0.07943493872880936, 'timestamp': '2025-09-10 02:58:31.634926', 'step': 18027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:31.688966', 'step': 18027, 'epoch': 3} {'type': 'loss', 'content': 0.08137030899524689, 'timestamp': '2025-09-10 02:58:31.694939', 'step': 18028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:31.748095', 'step': 18028, 'epoch': 3} {'type': 'loss', 'content': 0.0366462767124176, 'timestamp': '2025-09-10 02:58:31.750305', 'step': 18029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:31.804156', 'step': 18029, 'epoch': 3} {'type': 'loss', 'content': 0.06536837667226791, 'timestamp': '2025-09-10 02:58:31.806249', 'step': 18030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:31.862548', 'step': 18030, 'epoch': 3} {'type': 'loss', 'content': 0.08691256493330002, 'timestamp': '2025-09-10 02:58:31.864712', 'step': 18031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:31.920379', 'step': 18031, 'epoch': 3} {'type': 'loss', 'content': 0.11592482775449753, 'timestamp': '2025-09-10 02:58:31.926590', 'step': 18032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:31.979810', 'step': 18032, 'epoch': 3} {'type': 'loss', 'content': 0.08250149339437485, 'timestamp': '2025-09-10 02:58:31.981943', 'step': 18033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:32.036342', 'step': 18033, 'epoch': 3} {'type': 'loss', 'content': 0.09794001281261444, 'timestamp': '2025-09-10 02:58:32.038446', 'step': 18034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:32.091657', 'step': 18034, 'epoch': 3} {'type': 'loss', 'content': 0.0802953913807869, 'timestamp': '2025-09-10 02:58:32.093685', 'step': 18035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:32.146909', 'step': 18035, 'epoch': 3} {'type': 'loss', 'content': 0.20783261954784393, 'timestamp': '2025-09-10 02:58:32.152787', 'step': 18036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:32.204896', 'step': 18036, 'epoch': 3} {'type': 'loss', 'content': 0.13727176189422607, 'timestamp': '2025-09-10 02:58:32.206972', 'step': 18037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:32.259845', 'step': 18037, 'epoch': 3} {'type': 'loss', 'content': 0.03930759057402611, 'timestamp': '2025-09-10 02:58:32.263081', 'step': 18038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:32.317761', 'step': 18038, 'epoch': 3} {'type': 'loss', 'content': 0.10759054124355316, 'timestamp': '2025-09-10 02:58:32.319952', 'step': 18039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:32.372886', 'step': 18039, 'epoch': 3} {'type': 'loss', 'content': 0.002981719560921192, 'timestamp': '2025-09-10 02:58:32.378861', 'step': 18040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:32.431011', 'step': 18040, 'epoch': 3} {'type': 'loss', 'content': 0.07694046944379807, 'timestamp': '2025-09-10 02:58:32.433069', 'step': 18041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:32.485649', 'step': 18041, 'epoch': 3} {'type': 'loss', 'content': 0.09483359009027481, 'timestamp': '2025-09-10 02:58:32.487770', 'step': 18042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:32.542218', 'step': 18042, 'epoch': 3} {'type': 'loss', 'content': 0.06446749716997147, 'timestamp': '2025-09-10 02:58:32.544252', 'step': 18043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:32.597123', 'step': 18043, 'epoch': 3} {'type': 'loss', 'content': 0.06559241563081741, 'timestamp': '2025-09-10 02:58:32.603069', 'step': 18044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:32.655659', 'step': 18044, 'epoch': 3} {'type': 'loss', 'content': 0.08087880909442902, 'timestamp': '2025-09-10 02:58:32.657672', 'step': 18045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:32.710663', 'step': 18045, 'epoch': 3} {'type': 'loss', 'content': 0.10044638812541962, 'timestamp': '2025-09-10 02:58:32.712789', 'step': 18046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:32.765803', 'step': 18046, 'epoch': 3} {'type': 'loss', 'content': 0.033689215779304504, 'timestamp': '2025-09-10 02:58:32.767811', 'step': 18047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:58:32.820903', 'step': 18047, 'epoch': 3} {'type': 'loss', 'content': 0.04749363660812378, 'timestamp': '2025-09-10 02:58:32.826721', 'step': 18048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:32.878967', 'step': 18048, 'epoch': 3} {'type': 'loss', 'content': 0.07442249357700348, 'timestamp': '2025-09-10 02:58:32.880964', 'step': 18049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:32.934834', 'step': 18049, 'epoch': 3} {'type': 'loss', 'content': 0.10544856637716293, 'timestamp': '2025-09-10 02:58:32.936852', 'step': 18050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:32.989522', 'step': 18050, 'epoch': 3} {'type': 'loss', 'content': 0.08575716614723206, 'timestamp': '2025-09-10 02:58:32.994650', 'step': 18051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:33.051699', 'step': 18051, 'epoch': 3} {'type': 'loss', 'content': 0.10295361280441284, 'timestamp': '2025-09-10 02:58:33.057512', 'step': 18052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:33.109849', 'step': 18052, 'epoch': 3} {'type': 'loss', 'content': 0.13058502972126007, 'timestamp': '2025-09-10 02:58:33.117944', 'step': 18053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:33.173681', 'step': 18053, 'epoch': 3} {'type': 'loss', 'content': 0.1109403595328331, 'timestamp': '2025-09-10 02:58:33.175925', 'step': 18054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:33.242372', 'step': 18054, 'epoch': 3} {'type': 'loss', 'content': 0.09647982567548752, 'timestamp': '2025-09-10 02:58:33.244642', 'step': 18055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:33.299693', 'step': 18055, 'epoch': 3} {'type': 'loss', 'content': 0.09904132038354874, 'timestamp': '2025-09-10 02:58:33.305937', 'step': 18056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:33.358750', 'step': 18056, 'epoch': 3} {'type': 'loss', 'content': 0.09044840931892395, 'timestamp': '2025-09-10 02:58:33.360828', 'step': 18057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:33.422766', 'step': 18057, 'epoch': 3} {'type': 'loss', 'content': 0.07543233782052994, 'timestamp': '2025-09-10 02:58:33.424787', 'step': 18058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:33.479976', 'step': 18058, 'epoch': 3} {'type': 'loss', 'content': 0.06270965188741684, 'timestamp': '2025-09-10 02:58:33.482108', 'step': 18059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:33.536845', 'step': 18059, 'epoch': 3} {'type': 'loss', 'content': 0.15776823461055756, 'timestamp': '2025-09-10 02:58:33.543022', 'step': 18060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:58:33.600190', 'step': 18060, 'epoch': 3} {'type': 'loss', 'content': 0.09774959087371826, 'timestamp': '2025-09-10 02:58:33.602248', 'step': 18061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:33.663113', 'step': 18061, 'epoch': 3} {'type': 'loss', 'content': 0.03369845449924469, 'timestamp': '2025-09-10 02:58:33.665301', 'step': 18062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:33.720191', 'step': 18062, 'epoch': 3} {'type': 'loss', 'content': 0.15602275729179382, 'timestamp': '2025-09-10 02:58:33.722222', 'step': 18063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:33.785520', 'step': 18063, 'epoch': 3} {'type': 'loss', 'content': 0.05314357578754425, 'timestamp': '2025-09-10 02:58:33.791684', 'step': 18064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:33.843885', 'step': 18064, 'epoch': 3} {'type': 'loss', 'content': 0.12763884663581848, 'timestamp': '2025-09-10 02:58:33.845943', 'step': 18065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:33.899439', 'step': 18065, 'epoch': 3} {'type': 'loss', 'content': 0.11886374652385712, 'timestamp': '2025-09-10 02:58:33.901478', 'step': 18066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:33.954301', 'step': 18066, 'epoch': 3} {'type': 'loss', 'content': 0.08687971532344818, 'timestamp': '2025-09-10 02:58:33.956372', 'step': 18067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:34.009583', 'step': 18067, 'epoch': 3} {'type': 'loss', 'content': 0.1118188351392746, 'timestamp': '2025-09-10 02:58:34.021472', 'step': 18068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:34.077139', 'step': 18068, 'epoch': 3} {'type': 'loss', 'content': 0.09280277788639069, 'timestamp': '2025-09-10 02:58:34.079323', 'step': 18069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:34.133095', 'step': 18069, 'epoch': 3} {'type': 'loss', 'content': 0.09774205088615417, 'timestamp': '2025-09-10 02:58:34.135147', 'step': 18070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:34.188098', 'step': 18070, 'epoch': 3} {'type': 'loss', 'content': 0.135486900806427, 'timestamp': '2025-09-10 02:58:34.190199', 'step': 18071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:34.246764', 'step': 18071, 'epoch': 3} {'type': 'loss', 'content': 0.06783659756183624, 'timestamp': '2025-09-10 02:58:34.255427', 'step': 18072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:34.310421', 'step': 18072, 'epoch': 3} {'type': 'loss', 'content': 0.10223347693681717, 'timestamp': '2025-09-10 02:58:34.312631', 'step': 18073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:34.366334', 'step': 18073, 'epoch': 3} {'type': 'loss', 'content': 0.09254307299852371, 'timestamp': '2025-09-10 02:58:34.368677', 'step': 18074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:34.425872', 'step': 18074, 'epoch': 3} {'type': 'loss', 'content': 0.0990348681807518, 'timestamp': '2025-09-10 02:58:34.427993', 'step': 18075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:34.481018', 'step': 18075, 'epoch': 3} {'type': 'loss', 'content': 0.0836745873093605, 'timestamp': '2025-09-10 02:58:34.487123', 'step': 18076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:34.543212', 'step': 18076, 'epoch': 3} {'type': 'loss', 'content': 0.09987970441579819, 'timestamp': '2025-09-10 02:58:34.545294', 'step': 18077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:34.598802', 'step': 18077, 'epoch': 3} {'type': 'loss', 'content': 0.1037219762802124, 'timestamp': '2025-09-10 02:58:34.600852', 'step': 18078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:34.653552', 'step': 18078, 'epoch': 3} {'type': 'loss', 'content': 0.06489012390375137, 'timestamp': '2025-09-10 02:58:34.655573', 'step': 18079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:34.708337', 'step': 18079, 'epoch': 3} {'type': 'loss', 'content': 0.13131839036941528, 'timestamp': '2025-09-10 02:58:34.714169', 'step': 18080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:34.768721', 'step': 18080, 'epoch': 3} {'type': 'loss', 'content': 0.12682011723518372, 'timestamp': '2025-09-10 02:58:34.770880', 'step': 18081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:34.824246', 'step': 18081, 'epoch': 3} {'type': 'loss', 'content': 0.08330094069242477, 'timestamp': '2025-09-10 02:58:34.826545', 'step': 18082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:34.879443', 'step': 18082, 'epoch': 3} {'type': 'loss', 'content': 0.12473730742931366, 'timestamp': '2025-09-10 02:58:34.881617', 'step': 18083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:34.936521', 'step': 18083, 'epoch': 3} {'type': 'loss', 'content': 0.14028377830982208, 'timestamp': '2025-09-10 02:58:34.942244', 'step': 18084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:34.994767', 'step': 18084, 'epoch': 3} {'type': 'loss', 'content': 0.10782727599143982, 'timestamp': '2025-09-10 02:58:34.997004', 'step': 18085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:35.050259', 'step': 18085, 'epoch': 3} {'type': 'loss', 'content': 0.11544639617204666, 'timestamp': '2025-09-10 02:58:35.052571', 'step': 18086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:35.105875', 'step': 18086, 'epoch': 3} {'type': 'loss', 'content': 0.16184090077877045, 'timestamp': '2025-09-10 02:58:35.107952', 'step': 18087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:35.163025', 'step': 18087, 'epoch': 3} {'type': 'loss', 'content': 0.15332156419754028, 'timestamp': '2025-09-10 02:58:35.168737', 'step': 18088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:35.221302', 'step': 18088, 'epoch': 3} {'type': 'loss', 'content': 0.05591244623064995, 'timestamp': '2025-09-10 02:58:35.223363', 'step': 18089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:35.277198', 'step': 18089, 'epoch': 3} {'type': 'loss', 'content': 0.06234683841466904, 'timestamp': '2025-09-10 02:58:35.279259', 'step': 18090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:35.332357', 'step': 18090, 'epoch': 3} {'type': 'loss', 'content': 0.05235711857676506, 'timestamp': '2025-09-10 02:58:35.334531', 'step': 18091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:35.387398', 'step': 18091, 'epoch': 3} {'type': 'loss', 'content': 0.14425742626190186, 'timestamp': '2025-09-10 02:58:35.393406', 'step': 18092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:35.445545', 'step': 18092, 'epoch': 3} {'type': 'loss', 'content': 0.10253211855888367, 'timestamp': '2025-09-10 02:58:35.447582', 'step': 18093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:35.500218', 'step': 18093, 'epoch': 3} {'type': 'loss', 'content': 0.13884156942367554, 'timestamp': '2025-09-10 02:58:35.502301', 'step': 18094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:35.556413', 'step': 18094, 'epoch': 3} {'type': 'loss', 'content': 0.12835057079792023, 'timestamp': '2025-09-10 02:58:35.558444', 'step': 18095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:35.611276', 'step': 18095, 'epoch': 3} {'type': 'loss', 'content': 0.09203220158815384, 'timestamp': '2025-09-10 02:58:35.616968', 'step': 18096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:35.671101', 'step': 18096, 'epoch': 3} {'type': 'loss', 'content': 0.09504429996013641, 'timestamp': '2025-09-10 02:58:35.674959', 'step': 18097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:35.733752', 'step': 18097, 'epoch': 3} {'type': 'loss', 'content': 0.10293027758598328, 'timestamp': '2025-09-10 02:58:35.737610', 'step': 18098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:35.791163', 'step': 18098, 'epoch': 3} {'type': 'loss', 'content': 0.1261855810880661, 'timestamp': '2025-09-10 02:58:35.793224', 'step': 18099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:35.846455', 'step': 18099, 'epoch': 3} {'type': 'loss', 'content': 0.05872717127203941, 'timestamp': '2025-09-10 02:58:35.852398', 'step': 18100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:35.904285', 'step': 18100, 'epoch': 3} {'type': 'loss', 'content': 0.06616586446762085, 'timestamp': '2025-09-10 02:58:35.906332', 'step': 18101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:35.958910', 'step': 18101, 'epoch': 3} {'type': 'loss', 'content': 0.02861698716878891, 'timestamp': '2025-09-10 02:58:35.963318', 'step': 18102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:36.019422', 'step': 18102, 'epoch': 3} {'type': 'loss', 'content': 0.051044169813394547, 'timestamp': '2025-09-10 02:58:36.021489', 'step': 18103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:36.077415', 'step': 18103, 'epoch': 3} {'type': 'loss', 'content': 0.07748696208000183, 'timestamp': '2025-09-10 02:58:36.083086', 'step': 18104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:36.136886', 'step': 18104, 'epoch': 3} {'type': 'loss', 'content': 0.1712760180234909, 'timestamp': '2025-09-10 02:58:36.138962', 'step': 18105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:36.197126', 'step': 18105, 'epoch': 3} {'type': 'loss', 'content': 0.0493507944047451, 'timestamp': '2025-09-10 02:58:36.199135', 'step': 18106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:36.253300', 'step': 18106, 'epoch': 3} {'type': 'loss', 'content': 0.07539185881614685, 'timestamp': '2025-09-10 02:58:36.255282', 'step': 18107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:36.308247', 'step': 18107, 'epoch': 3} {'type': 'loss', 'content': 0.10879373550415039, 'timestamp': '2025-09-10 02:58:36.314130', 'step': 18108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:36.367205', 'step': 18108, 'epoch': 3} {'type': 'loss', 'content': 0.05934477597475052, 'timestamp': '2025-09-10 02:58:36.369420', 'step': 18109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:36.427959', 'step': 18109, 'epoch': 3} {'type': 'loss', 'content': 0.03628157824277878, 'timestamp': '2025-09-10 02:58:36.430057', 'step': 18110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:36.482991', 'step': 18110, 'epoch': 3} {'type': 'loss', 'content': 0.06877846270799637, 'timestamp': '2025-09-10 02:58:36.485389', 'step': 18111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:36.539919', 'step': 18111, 'epoch': 3} {'type': 'loss', 'content': 0.03774002939462662, 'timestamp': '2025-09-10 02:58:36.550631', 'step': 18112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:36.610446', 'step': 18112, 'epoch': 3} {'type': 'loss', 'content': 0.039445873349905014, 'timestamp': '2025-09-10 02:58:36.612448', 'step': 18113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:36.664848', 'step': 18113, 'epoch': 3} {'type': 'loss', 'content': 0.11231156438589096, 'timestamp': '2025-09-10 02:58:36.666939', 'step': 18114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:36.720171', 'step': 18114, 'epoch': 3} {'type': 'loss', 'content': 0.0395013764500618, 'timestamp': '2025-09-10 02:58:36.722193', 'step': 18115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:36.774728', 'step': 18115, 'epoch': 3} {'type': 'loss', 'content': 0.07170432060956955, 'timestamp': '2025-09-10 02:58:36.780318', 'step': 18116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:36.833610', 'step': 18116, 'epoch': 3} {'type': 'loss', 'content': 0.0544656440615654, 'timestamp': '2025-09-10 02:58:36.835566', 'step': 18117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:36.889407', 'step': 18117, 'epoch': 3} {'type': 'loss', 'content': 0.04466117545962334, 'timestamp': '2025-09-10 02:58:36.891589', 'step': 18118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:36.944641', 'step': 18118, 'epoch': 3} {'type': 'loss', 'content': 0.09546218812465668, 'timestamp': '2025-09-10 02:58:36.946674', 'step': 18119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:37.000993', 'step': 18119, 'epoch': 3} {'type': 'loss', 'content': 0.058441177010536194, 'timestamp': '2025-09-10 02:58:37.006904', 'step': 18120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:37.059515', 'step': 18120, 'epoch': 3} {'type': 'loss', 'content': 0.0889110341668129, 'timestamp': '2025-09-10 02:58:37.061535', 'step': 18121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:37.114091', 'step': 18121, 'epoch': 3} {'type': 'loss', 'content': 0.11336036026477814, 'timestamp': '2025-09-10 02:58:37.116336', 'step': 18122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:37.170342', 'step': 18122, 'epoch': 3} {'type': 'loss', 'content': 0.12983623147010803, 'timestamp': '2025-09-10 02:58:37.173912', 'step': 18123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:37.228014', 'step': 18123, 'epoch': 3} {'type': 'loss', 'content': 0.14030525088310242, 'timestamp': '2025-09-10 02:58:37.233800', 'step': 18124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:37.285886', 'step': 18124, 'epoch': 3} {'type': 'loss', 'content': 0.05152449756860733, 'timestamp': '2025-09-10 02:58:37.288058', 'step': 18125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:37.341626', 'step': 18125, 'epoch': 3} {'type': 'loss', 'content': 0.09037143737077713, 'timestamp': '2025-09-10 02:58:37.344032', 'step': 18126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:37.413019', 'step': 18126, 'epoch': 3} {'type': 'loss', 'content': 0.11060884594917297, 'timestamp': '2025-09-10 02:58:37.415588', 'step': 18127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:37.474146', 'step': 18127, 'epoch': 3} {'type': 'loss', 'content': 0.07329735904932022, 'timestamp': '2025-09-10 02:58:37.480024', 'step': 18128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:37.536885', 'step': 18128, 'epoch': 3} {'type': 'loss', 'content': 0.048571351915597916, 'timestamp': '2025-09-10 02:58:37.538898', 'step': 18129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:37.594405', 'step': 18129, 'epoch': 3} {'type': 'loss', 'content': 0.13049782812595367, 'timestamp': '2025-09-10 02:58:37.598199', 'step': 18130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:37.654528', 'step': 18130, 'epoch': 3} {'type': 'loss', 'content': 0.10497841984033585, 'timestamp': '2025-09-10 02:58:37.656638', 'step': 18131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:37.712059', 'step': 18131, 'epoch': 3} {'type': 'loss', 'content': 0.07673173397779465, 'timestamp': '2025-09-10 02:58:37.717966', 'step': 18132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:37.770388', 'step': 18132, 'epoch': 3} {'type': 'loss', 'content': 0.08847447484731674, 'timestamp': '2025-09-10 02:58:37.772418', 'step': 18133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:37.825138', 'step': 18133, 'epoch': 3} {'type': 'loss', 'content': 0.0658843144774437, 'timestamp': '2025-09-10 02:58:37.827282', 'step': 18134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:37.880293', 'step': 18134, 'epoch': 3} {'type': 'loss', 'content': 0.09749050438404083, 'timestamp': '2025-09-10 02:58:37.882541', 'step': 18135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:37.936172', 'step': 18135, 'epoch': 3} {'type': 'loss', 'content': 0.07634072005748749, 'timestamp': '2025-09-10 02:58:37.941995', 'step': 18136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:37.995757', 'step': 18136, 'epoch': 3} {'type': 'loss', 'content': 0.1636020839214325, 'timestamp': '2025-09-10 02:58:37.997753', 'step': 18137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:38.050678', 'step': 18137, 'epoch': 3} {'type': 'loss', 'content': 0.07202832400798798, 'timestamp': '2025-09-10 02:58:38.052843', 'step': 18138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:38.106251', 'step': 18138, 'epoch': 3} {'type': 'loss', 'content': 0.08119241148233414, 'timestamp': '2025-09-10 02:58:38.108294', 'step': 18139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:38.160936', 'step': 18139, 'epoch': 3} {'type': 'loss', 'content': 0.08758164197206497, 'timestamp': '2025-09-10 02:58:38.167060', 'step': 18140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:38.220338', 'step': 18140, 'epoch': 3} {'type': 'loss', 'content': 0.10931660234928131, 'timestamp': '2025-09-10 02:58:38.222532', 'step': 18141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:38.275485', 'step': 18141, 'epoch': 3} {'type': 'loss', 'content': 0.10949291288852692, 'timestamp': '2025-09-10 02:58:38.277509', 'step': 18142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:38.331200', 'step': 18142, 'epoch': 3} {'type': 'loss', 'content': 0.12678588926792145, 'timestamp': '2025-09-10 02:58:38.333200', 'step': 18143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:58:38.386058', 'step': 18143, 'epoch': 3} {'type': 'loss', 'content': 0.09785611182451248, 'timestamp': '2025-09-10 02:58:38.391751', 'step': 18144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:38.446763', 'step': 18144, 'epoch': 3} {'type': 'loss', 'content': 0.03935403749346733, 'timestamp': '2025-09-10 02:58:38.449045', 'step': 18145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:38.502945', 'step': 18145, 'epoch': 3} {'type': 'loss', 'content': 0.06142214685678482, 'timestamp': '2025-09-10 02:58:38.505045', 'step': 18146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:38.559298', 'step': 18146, 'epoch': 3} {'type': 'loss', 'content': 0.18496255576610565, 'timestamp': '2025-09-10 02:58:38.561313', 'step': 18147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:38.614514', 'step': 18147, 'epoch': 3} {'type': 'loss', 'content': 0.20588037371635437, 'timestamp': '2025-09-10 02:58:38.620350', 'step': 18148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:38.672341', 'step': 18148, 'epoch': 3} {'type': 'loss', 'content': 0.05235997587442398, 'timestamp': '2025-09-10 02:58:38.674366', 'step': 18149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:38.730244', 'step': 18149, 'epoch': 3} {'type': 'loss', 'content': 0.08390368521213531, 'timestamp': '2025-09-10 02:58:38.732301', 'step': 18150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:58:38.788096', 'step': 18150, 'epoch': 3} {'type': 'loss', 'content': 0.07602535933256149, 'timestamp': '2025-09-10 02:58:38.790192', 'step': 18151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:38.843099', 'step': 18151, 'epoch': 3} {'type': 'loss', 'content': 0.07467912882566452, 'timestamp': '2025-09-10 02:58:38.848694', 'step': 18152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:38.903585', 'step': 18152, 'epoch': 3} {'type': 'loss', 'content': 0.03624293580651283, 'timestamp': '2025-09-10 02:58:38.905615', 'step': 18153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:38.958410', 'step': 18153, 'epoch': 3} {'type': 'loss', 'content': 0.050122760236263275, 'timestamp': '2025-09-10 02:58:38.960670', 'step': 18154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:39.016537', 'step': 18154, 'epoch': 3} {'type': 'loss', 'content': 0.20723317563533783, 'timestamp': '2025-09-10 02:58:39.019021', 'step': 18155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:39.072502', 'step': 18155, 'epoch': 3} {'type': 'loss', 'content': 0.06712768971920013, 'timestamp': '2025-09-10 02:58:39.078363', 'step': 18156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:39.135899', 'step': 18156, 'epoch': 3} {'type': 'loss', 'content': 0.12310238182544708, 'timestamp': '2025-09-10 02:58:39.137945', 'step': 18157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:39.195845', 'step': 18157, 'epoch': 3} {'type': 'loss', 'content': 0.1354767233133316, 'timestamp': '2025-09-10 02:58:39.197924', 'step': 18158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:39.251324', 'step': 18158, 'epoch': 3} {'type': 'loss', 'content': 0.10238314419984818, 'timestamp': '2025-09-10 02:58:39.253339', 'step': 18159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:39.307213', 'step': 18159, 'epoch': 3} {'type': 'loss', 'content': 0.08780019730329514, 'timestamp': '2025-09-10 02:58:39.312900', 'step': 18160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:39.365376', 'step': 18160, 'epoch': 3} {'type': 'loss', 'content': 0.04644640162587166, 'timestamp': '2025-09-10 02:58:39.367431', 'step': 18161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:39.426951', 'step': 18161, 'epoch': 3} {'type': 'loss', 'content': 0.16920706629753113, 'timestamp': '2025-09-10 02:58:39.429032', 'step': 18162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:39.485045', 'step': 18162, 'epoch': 3} {'type': 'loss', 'content': 0.03610178828239441, 'timestamp': '2025-09-10 02:58:39.487253', 'step': 18163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:39.540552', 'step': 18163, 'epoch': 3} {'type': 'loss', 'content': 0.11995463818311691, 'timestamp': '2025-09-10 02:58:39.546253', 'step': 18164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:39.602567', 'step': 18164, 'epoch': 3} {'type': 'loss', 'content': 0.07292353361845016, 'timestamp': '2025-09-10 02:58:39.604571', 'step': 18165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:39.658587', 'step': 18165, 'epoch': 3} {'type': 'loss', 'content': 0.11507605016231537, 'timestamp': '2025-09-10 02:58:39.660602', 'step': 18166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:39.716443', 'step': 18166, 'epoch': 3} {'type': 'loss', 'content': 0.055498190224170685, 'timestamp': '2025-09-10 02:58:39.736811', 'step': 18167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:39.792310', 'step': 18167, 'epoch': 3} {'type': 'loss', 'content': 0.17023804783821106, 'timestamp': '2025-09-10 02:58:39.798209', 'step': 18168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:39.851675', 'step': 18168, 'epoch': 3} {'type': 'loss', 'content': 0.11974463611841202, 'timestamp': '2025-09-10 02:58:39.857621', 'step': 18169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:39.916672', 'step': 18169, 'epoch': 3} {'type': 'loss', 'content': 0.04644244536757469, 'timestamp': '2025-09-10 02:58:39.919002', 'step': 18170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:39.971732', 'step': 18170, 'epoch': 3} {'type': 'loss', 'content': 0.04442170634865761, 'timestamp': '2025-09-10 02:58:39.973746', 'step': 18171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:40.026456', 'step': 18171, 'epoch': 3} {'type': 'loss', 'content': 0.07218734920024872, 'timestamp': '2025-09-10 02:58:40.032377', 'step': 18172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:40.085870', 'step': 18172, 'epoch': 3} {'type': 'loss', 'content': 0.043697506189346313, 'timestamp': '2025-09-10 02:58:40.087983', 'step': 18173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:40.141516', 'step': 18173, 'epoch': 3} {'type': 'loss', 'content': 0.08670806884765625, 'timestamp': '2025-09-10 02:58:40.144438', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:58:52.780377', 'step': 18174, 'epoch': 3} {'type': 'pplx', 'content': 11787.707233789786, 'timestamp': '2025-09-10 02:58:52.783407', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:52.838642', 'step': 18174, 'epoch': 3} {'type': 'loss', 'content': 0.07798322290182114, 'timestamp': '2025-09-10 02:58:52.840583', 'step': 18175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:52.895610', 'step': 18175, 'epoch': 3} {'type': 'loss', 'content': 0.08871184289455414, 'timestamp': '2025-09-10 02:58:52.901792', 'step': 18176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:52.956015', 'step': 18176, 'epoch': 3} {'type': 'loss', 'content': 0.0685122162103653, 'timestamp': '2025-09-10 02:58:52.958130', 'step': 18177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:53.013919', 'step': 18177, 'epoch': 3} {'type': 'loss', 'content': 0.08490562438964844, 'timestamp': '2025-09-10 02:58:53.016236', 'step': 18178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:53.072239', 'step': 18178, 'epoch': 3} {'type': 'loss', 'content': 0.10127304494380951, 'timestamp': '2025-09-10 02:58:53.074389', 'step': 18179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:53.129378', 'step': 18179, 'epoch': 3} {'type': 'loss', 'content': 0.07559885084629059, 'timestamp': '2025-09-10 02:58:53.135746', 'step': 18180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:53.189833', 'step': 18180, 'epoch': 3} {'type': 'loss', 'content': 0.07057903707027435, 'timestamp': '2025-09-10 02:58:53.191963', 'step': 18181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:53.248935', 'step': 18181, 'epoch': 3} {'type': 'loss', 'content': 0.04368866607546806, 'timestamp': '2025-09-10 02:58:53.255194', 'step': 18182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:53.315047', 'step': 18182, 'epoch': 3} {'type': 'loss', 'content': 0.08414886146783829, 'timestamp': '2025-09-10 02:58:53.317056', 'step': 18183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:58:53.371357', 'step': 18183, 'epoch': 3} {'type': 'loss', 'content': 0.08606664091348648, 'timestamp': '2025-09-10 02:58:53.377436', 'step': 18184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:53.431717', 'step': 18184, 'epoch': 3} {'type': 'loss', 'content': 0.07753214985132217, 'timestamp': '2025-09-10 02:58:53.433786', 'step': 18185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:53.491937', 'step': 18185, 'epoch': 3} {'type': 'loss', 'content': 0.20027896761894226, 'timestamp': '2025-09-10 02:58:53.494100', 'step': 18186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:53.549469', 'step': 18186, 'epoch': 3} {'type': 'loss', 'content': 0.15966510772705078, 'timestamp': '2025-09-10 02:58:53.551524', 'step': 18187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:53.606668', 'step': 18187, 'epoch': 3} {'type': 'loss', 'content': 0.08614616096019745, 'timestamp': '2025-09-10 02:58:53.612812', 'step': 18188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:53.666412', 'step': 18188, 'epoch': 3} {'type': 'loss', 'content': 0.11124857515096664, 'timestamp': '2025-09-10 02:58:53.668492', 'step': 18189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:53.723657', 'step': 18189, 'epoch': 3} {'type': 'loss', 'content': 0.07918403297662735, 'timestamp': '2025-09-10 02:58:53.725666', 'step': 18190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:53.779850', 'step': 18190, 'epoch': 3} {'type': 'loss', 'content': 0.07303564250469208, 'timestamp': '2025-09-10 02:58:53.782059', 'step': 18191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:53.835147', 'step': 18191, 'epoch': 3} {'type': 'loss', 'content': 0.02182222157716751, 'timestamp': '2025-09-10 02:58:53.841554', 'step': 18192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:53.894710', 'step': 18192, 'epoch': 3} {'type': 'loss', 'content': 0.12513215839862823, 'timestamp': '2025-09-10 02:58:53.896967', 'step': 18193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:53.950404', 'step': 18193, 'epoch': 3} {'type': 'loss', 'content': 0.0900828018784523, 'timestamp': '2025-09-10 02:58:53.952459', 'step': 18194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:54.005150', 'step': 18194, 'epoch': 3} {'type': 'loss', 'content': 0.04103580489754677, 'timestamp': '2025-09-10 02:58:54.007243', 'step': 18195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:54.060315', 'step': 18195, 'epoch': 3} {'type': 'loss', 'content': 0.10811381042003632, 'timestamp': '2025-09-10 02:58:54.066235', 'step': 18196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:54.118489', 'step': 18196, 'epoch': 3} {'type': 'loss', 'content': 0.07419700175523758, 'timestamp': '2025-09-10 02:58:54.120629', 'step': 18197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:54.173419', 'step': 18197, 'epoch': 3} {'type': 'loss', 'content': 0.09131345897912979, 'timestamp': '2025-09-10 02:58:54.175498', 'step': 18198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:54.229086', 'step': 18198, 'epoch': 3} {'type': 'loss', 'content': 0.17307829856872559, 'timestamp': '2025-09-10 02:58:54.231198', 'step': 18199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:54.284875', 'step': 18199, 'epoch': 3} {'type': 'loss', 'content': 0.1019492894411087, 'timestamp': '2025-09-10 02:58:54.291061', 'step': 18200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:54.344819', 'step': 18200, 'epoch': 3} {'type': 'loss', 'content': 0.049682050943374634, 'timestamp': '2025-09-10 02:58:54.346873', 'step': 18201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:54.400128', 'step': 18201, 'epoch': 3} {'type': 'loss', 'content': 0.059960901737213135, 'timestamp': '2025-09-10 02:58:54.402242', 'step': 18202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:54.456470', 'step': 18202, 'epoch': 3} {'type': 'loss', 'content': 0.11285582184791565, 'timestamp': '2025-09-10 02:58:54.458507', 'step': 18203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:54.511595', 'step': 18203, 'epoch': 3} {'type': 'loss', 'content': 0.11507612466812134, 'timestamp': '2025-09-10 02:58:54.517711', 'step': 18204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:54.570839', 'step': 18204, 'epoch': 3} {'type': 'loss', 'content': 0.1703975349664688, 'timestamp': '2025-09-10 02:58:54.572863', 'step': 18205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:54.626761', 'step': 18205, 'epoch': 3} {'type': 'loss', 'content': 0.08509982377290726, 'timestamp': '2025-09-10 02:58:54.628974', 'step': 18206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:54.682870', 'step': 18206, 'epoch': 3} {'type': 'loss', 'content': 0.11016040295362473, 'timestamp': '2025-09-10 02:58:54.685083', 'step': 18207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:54.738644', 'step': 18207, 'epoch': 3} {'type': 'loss', 'content': 0.12579959630966187, 'timestamp': '2025-09-10 02:58:54.744501', 'step': 18208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:54.798007', 'step': 18208, 'epoch': 3} {'type': 'loss', 'content': 0.09921818226575851, 'timestamp': '2025-09-10 02:58:54.800264', 'step': 18209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:54.853756', 'step': 18209, 'epoch': 3} {'type': 'loss', 'content': 0.1810072958469391, 'timestamp': '2025-09-10 02:58:54.855812', 'step': 18210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:54.909196', 'step': 18210, 'epoch': 3} {'type': 'loss', 'content': 0.1189289391040802, 'timestamp': '2025-09-10 02:58:54.911436', 'step': 18211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:54.965547', 'step': 18211, 'epoch': 3} {'type': 'loss', 'content': 0.0067076729610562325, 'timestamp': '2025-09-10 02:58:54.971634', 'step': 18212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:55.026049', 'step': 18212, 'epoch': 3} {'type': 'loss', 'content': 0.1398642212152481, 'timestamp': '2025-09-10 02:58:55.028191', 'step': 18213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:55.102737', 'step': 18213, 'epoch': 3} {'type': 'loss', 'content': 0.13102710247039795, 'timestamp': '2025-09-10 02:58:55.104999', 'step': 18214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:55.176928', 'step': 18214, 'epoch': 3} {'type': 'loss', 'content': 0.056215450167655945, 'timestamp': '2025-09-10 02:58:55.179191', 'step': 18215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:55.267507', 'step': 18215, 'epoch': 3} {'type': 'loss', 'content': 0.11624076962471008, 'timestamp': '2025-09-10 02:58:55.273517', 'step': 18216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:55.332855', 'step': 18216, 'epoch': 3} {'type': 'loss', 'content': 0.07409515976905823, 'timestamp': '2025-09-10 02:58:55.335192', 'step': 18217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:55.424588', 'step': 18217, 'epoch': 3} {'type': 'loss', 'content': 0.0548960380256176, 'timestamp': '2025-09-10 02:58:55.427005', 'step': 18218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:55.499397', 'step': 18218, 'epoch': 3} {'type': 'loss', 'content': 0.056529153138399124, 'timestamp': '2025-09-10 02:58:55.501774', 'step': 18219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:55.576888', 'step': 18219, 'epoch': 3} {'type': 'loss', 'content': 0.11090891808271408, 'timestamp': '2025-09-10 02:58:55.584356', 'step': 18220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:55.645850', 'step': 18220, 'epoch': 3} {'type': 'loss', 'content': 0.08528467267751694, 'timestamp': '2025-09-10 02:58:55.648470', 'step': 18221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:55.709543', 'step': 18221, 'epoch': 3} {'type': 'loss', 'content': 0.12955069541931152, 'timestamp': '2025-09-10 02:58:55.711820', 'step': 18222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:55.768357', 'step': 18222, 'epoch': 3} {'type': 'loss', 'content': 0.10729341208934784, 'timestamp': '2025-09-10 02:58:55.770608', 'step': 18223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:55.825335', 'step': 18223, 'epoch': 3} {'type': 'loss', 'content': 0.05401065573096275, 'timestamp': '2025-09-10 02:58:55.831701', 'step': 18224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:55.885288', 'step': 18224, 'epoch': 3} {'type': 'loss', 'content': 0.10879556089639664, 'timestamp': '2025-09-10 02:58:55.887627', 'step': 18225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:55.941788', 'step': 18225, 'epoch': 3} {'type': 'loss', 'content': 0.05819959193468094, 'timestamp': '2025-09-10 02:58:55.943997', 'step': 18226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:56.000286', 'step': 18226, 'epoch': 3} {'type': 'loss', 'content': 0.08984730392694473, 'timestamp': '2025-09-10 02:58:56.002549', 'step': 18227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:56.057280', 'step': 18227, 'epoch': 3} {'type': 'loss', 'content': 0.12667962908744812, 'timestamp': '2025-09-10 02:58:56.063312', 'step': 18228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:56.116864', 'step': 18228, 'epoch': 3} {'type': 'loss', 'content': 0.10318265110254288, 'timestamp': '2025-09-10 02:58:56.118968', 'step': 18229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:56.172930', 'step': 18229, 'epoch': 3} {'type': 'loss', 'content': 0.056788794696331024, 'timestamp': '2025-09-10 02:58:56.175134', 'step': 18230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:56.229088', 'step': 18230, 'epoch': 3} {'type': 'loss', 'content': 0.12499494850635529, 'timestamp': '2025-09-10 02:58:56.231298', 'step': 18231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:56.285125', 'step': 18231, 'epoch': 3} {'type': 'loss', 'content': 0.06982322037220001, 'timestamp': '2025-09-10 02:58:56.290888', 'step': 18232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:56.344271', 'step': 18232, 'epoch': 3} {'type': 'loss', 'content': 0.07552602142095566, 'timestamp': '2025-09-10 02:58:56.346500', 'step': 18233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:56.400627', 'step': 18233, 'epoch': 3} {'type': 'loss', 'content': 0.07644959539175034, 'timestamp': '2025-09-10 02:58:56.402849', 'step': 18234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:56.457676', 'step': 18234, 'epoch': 3} {'type': 'loss', 'content': 0.0945262759923935, 'timestamp': '2025-09-10 02:58:56.461459', 'step': 18235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:56.518643', 'step': 18235, 'epoch': 3} {'type': 'loss', 'content': 0.10585077106952667, 'timestamp': '2025-09-10 02:58:56.524636', 'step': 18236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:56.577747', 'step': 18236, 'epoch': 3} {'type': 'loss', 'content': 0.051650140434503555, 'timestamp': '2025-09-10 02:58:56.579791', 'step': 18237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:56.632989', 'step': 18237, 'epoch': 3} {'type': 'loss', 'content': 0.11742448806762695, 'timestamp': '2025-09-10 02:58:56.635028', 'step': 18238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:56.688006', 'step': 18238, 'epoch': 3} {'type': 'loss', 'content': 0.13160032033920288, 'timestamp': '2025-09-10 02:58:56.690122', 'step': 18239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:56.743608', 'step': 18239, 'epoch': 3} {'type': 'loss', 'content': 0.05641373619437218, 'timestamp': '2025-09-10 02:58:56.749390', 'step': 18240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:56.801977', 'step': 18240, 'epoch': 3} {'type': 'loss', 'content': 0.10158678889274597, 'timestamp': '2025-09-10 02:58:56.804172', 'step': 18241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:56.857504', 'step': 18241, 'epoch': 3} {'type': 'loss', 'content': 0.04359535872936249, 'timestamp': '2025-09-10 02:58:56.859845', 'step': 18242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:58:56.913628', 'step': 18242, 'epoch': 3} {'type': 'loss', 'content': 0.015527383424341679, 'timestamp': '2025-09-10 02:58:56.916122', 'step': 18243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:56.969060', 'step': 18243, 'epoch': 3} {'type': 'loss', 'content': 0.05792682617902756, 'timestamp': '2025-09-10 02:58:56.974838', 'step': 18244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:57.026882', 'step': 18244, 'epoch': 3} {'type': 'loss', 'content': 0.15862543880939484, 'timestamp': '2025-09-10 02:58:57.029280', 'step': 18245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:57.084344', 'step': 18245, 'epoch': 3} {'type': 'loss', 'content': 0.07271945476531982, 'timestamp': '2025-09-10 02:58:57.086626', 'step': 18246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:57.140506', 'step': 18246, 'epoch': 3} {'type': 'loss', 'content': 0.14890015125274658, 'timestamp': '2025-09-10 02:58:57.142703', 'step': 18247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:58:57.196692', 'step': 18247, 'epoch': 3} {'type': 'loss', 'content': 0.11351948231458664, 'timestamp': '2025-09-10 02:58:57.202453', 'step': 18248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:57.260301', 'step': 18248, 'epoch': 3} {'type': 'loss', 'content': 0.12983661890029907, 'timestamp': '2025-09-10 02:58:57.262718', 'step': 18249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:57.316731', 'step': 18249, 'epoch': 3} {'type': 'loss', 'content': 0.08903476595878601, 'timestamp': '2025-09-10 02:58:57.319110', 'step': 18250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:57.372635', 'step': 18250, 'epoch': 3} {'type': 'loss', 'content': 0.0838068500161171, 'timestamp': '2025-09-10 02:58:57.374960', 'step': 18251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:57.428312', 'step': 18251, 'epoch': 3} {'type': 'loss', 'content': 0.14032666385173798, 'timestamp': '2025-09-10 02:58:57.434265', 'step': 18252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:57.487912', 'step': 18252, 'epoch': 3} {'type': 'loss', 'content': 0.05736253783106804, 'timestamp': '2025-09-10 02:58:57.490150', 'step': 18253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:57.543622', 'step': 18253, 'epoch': 3} {'type': 'loss', 'content': 0.12449881434440613, 'timestamp': '2025-09-10 02:58:57.545913', 'step': 18254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:57.599753', 'step': 18254, 'epoch': 3} {'type': 'loss', 'content': 0.04235469922423363, 'timestamp': '2025-09-10 02:58:57.602047', 'step': 18255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:57.655404', 'step': 18255, 'epoch': 3} {'type': 'loss', 'content': 0.12232881784439087, 'timestamp': '2025-09-10 02:58:57.661335', 'step': 18256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:57.714107', 'step': 18256, 'epoch': 3} {'type': 'loss', 'content': 0.03345223516225815, 'timestamp': '2025-09-10 02:58:57.716435', 'step': 18257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:57.770413', 'step': 18257, 'epoch': 3} {'type': 'loss', 'content': 0.06568685173988342, 'timestamp': '2025-09-10 02:58:57.772525', 'step': 18258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:57.826753', 'step': 18258, 'epoch': 3} {'type': 'loss', 'content': 0.16334138810634613, 'timestamp': '2025-09-10 02:58:57.829002', 'step': 18259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:58:57.898491', 'step': 18259, 'epoch': 3} {'type': 'loss', 'content': 0.06131314858794212, 'timestamp': '2025-09-10 02:58:57.911803', 'step': 18260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:57.966561', 'step': 18260, 'epoch': 3} {'type': 'loss', 'content': 0.10263526439666748, 'timestamp': '2025-09-10 02:58:57.968809', 'step': 18261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:58.024110', 'step': 18261, 'epoch': 3} {'type': 'loss', 'content': 0.1679815948009491, 'timestamp': '2025-09-10 02:58:58.026475', 'step': 18262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:58.080964', 'step': 18262, 'epoch': 3} {'type': 'loss', 'content': 0.14984476566314697, 'timestamp': '2025-09-10 02:58:58.083292', 'step': 18263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:58.137482', 'step': 18263, 'epoch': 3} {'type': 'loss', 'content': 0.15545469522476196, 'timestamp': '2025-09-10 02:58:58.143615', 'step': 18264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:58.197292', 'step': 18264, 'epoch': 3} {'type': 'loss', 'content': 0.08098383247852325, 'timestamp': '2025-09-10 02:58:58.199850', 'step': 18265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:58.259267', 'step': 18265, 'epoch': 3} {'type': 'loss', 'content': 0.10803167521953583, 'timestamp': '2025-09-10 02:58:58.261975', 'step': 18266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:58.316013', 'step': 18266, 'epoch': 3} {'type': 'loss', 'content': 0.07545085996389389, 'timestamp': '2025-09-10 02:58:58.318403', 'step': 18267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:58.373011', 'step': 18267, 'epoch': 3} {'type': 'loss', 'content': 0.050071943551301956, 'timestamp': '2025-09-10 02:58:58.379022', 'step': 18268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:58.431971', 'step': 18268, 'epoch': 3} {'type': 'loss', 'content': 0.1358875036239624, 'timestamp': '2025-09-10 02:58:58.434014', 'step': 18269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:58.487342', 'step': 18269, 'epoch': 3} {'type': 'loss', 'content': 0.07499939203262329, 'timestamp': '2025-09-10 02:58:58.489338', 'step': 18270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:58.543642', 'step': 18270, 'epoch': 3} {'type': 'loss', 'content': 0.07489623874425888, 'timestamp': '2025-09-10 02:58:58.545951', 'step': 18271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:58.599459', 'step': 18271, 'epoch': 3} {'type': 'loss', 'content': 0.08079656958580017, 'timestamp': '2025-09-10 02:58:58.605501', 'step': 18272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:58:58.658296', 'step': 18272, 'epoch': 3} {'type': 'loss', 'content': 0.06397683173418045, 'timestamp': '2025-09-10 02:58:58.660413', 'step': 18273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:58.713853', 'step': 18273, 'epoch': 3} {'type': 'loss', 'content': 0.08288516104221344, 'timestamp': '2025-09-10 02:58:58.716070', 'step': 18274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:58.769095', 'step': 18274, 'epoch': 3} {'type': 'loss', 'content': 0.1551332175731659, 'timestamp': '2025-09-10 02:58:58.771267', 'step': 18275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:58.826139', 'step': 18275, 'epoch': 3} {'type': 'loss', 'content': 0.11923740804195404, 'timestamp': '2025-09-10 02:58:58.832162', 'step': 18276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:58.885992', 'step': 18276, 'epoch': 3} {'type': 'loss', 'content': 0.09802903980016708, 'timestamp': '2025-09-10 02:58:58.888288', 'step': 18277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:58.943083', 'step': 18277, 'epoch': 3} {'type': 'loss', 'content': 0.04604235664010048, 'timestamp': '2025-09-10 02:58:58.945341', 'step': 18278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:58:58.999473', 'step': 18278, 'epoch': 3} {'type': 'loss', 'content': 0.065999835729599, 'timestamp': '2025-09-10 02:58:59.001856', 'step': 18279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:59.055976', 'step': 18279, 'epoch': 3} {'type': 'loss', 'content': 0.12265686690807343, 'timestamp': '2025-09-10 02:58:59.062049', 'step': 18280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:59.115488', 'step': 18280, 'epoch': 3} {'type': 'loss', 'content': 0.12426253408193588, 'timestamp': '2025-09-10 02:58:59.117647', 'step': 18281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:59.171569', 'step': 18281, 'epoch': 3} {'type': 'loss', 'content': 0.09209343045949936, 'timestamp': '2025-09-10 02:58:59.174007', 'step': 18282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:59.230616', 'step': 18282, 'epoch': 3} {'type': 'loss', 'content': 0.026976849883794785, 'timestamp': '2025-09-10 02:58:59.232926', 'step': 18283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:59.289703', 'step': 18283, 'epoch': 3} {'type': 'loss', 'content': 0.06999380141496658, 'timestamp': '2025-09-10 02:58:59.295313', 'step': 18284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:59.348279', 'step': 18284, 'epoch': 3} {'type': 'loss', 'content': 0.16422446072101593, 'timestamp': '2025-09-10 02:58:59.350348', 'step': 18285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:59.403773', 'step': 18285, 'epoch': 3} {'type': 'loss', 'content': 0.049922142177820206, 'timestamp': '2025-09-10 02:58:59.406017', 'step': 18286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:59.459660', 'step': 18286, 'epoch': 3} {'type': 'loss', 'content': 0.13295061886310577, 'timestamp': '2025-09-10 02:58:59.461821', 'step': 18287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:58:59.516808', 'step': 18287, 'epoch': 3} {'type': 'loss', 'content': 0.0636407807469368, 'timestamp': '2025-09-10 02:58:59.522782', 'step': 18288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:59.576818', 'step': 18288, 'epoch': 3} {'type': 'loss', 'content': 0.09767159074544907, 'timestamp': '2025-09-10 02:58:59.579136', 'step': 18289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:58:59.634115', 'step': 18289, 'epoch': 3} {'type': 'loss', 'content': 0.06939776241779327, 'timestamp': '2025-09-10 02:58:59.636410', 'step': 18290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:59.690607', 'step': 18290, 'epoch': 3} {'type': 'loss', 'content': 0.1257944256067276, 'timestamp': '2025-09-10 02:58:59.692985', 'step': 18291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:59.748092', 'step': 18291, 'epoch': 3} {'type': 'loss', 'content': 0.09277769178152084, 'timestamp': '2025-09-10 02:58:59.754346', 'step': 18292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:58:59.807502', 'step': 18292, 'epoch': 3} {'type': 'loss', 'content': 0.1054312065243721, 'timestamp': '2025-09-10 02:58:59.809880', 'step': 18293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:58:59.863498', 'step': 18293, 'epoch': 3} {'type': 'loss', 'content': 0.14123018085956573, 'timestamp': '2025-09-10 02:58:59.865696', 'step': 18294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:59.919114', 'step': 18294, 'epoch': 3} {'type': 'loss', 'content': 0.04768321290612221, 'timestamp': '2025-09-10 02:58:59.921127', 'step': 18295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:58:59.974697', 'step': 18295, 'epoch': 3} {'type': 'loss', 'content': 0.18015174567699432, 'timestamp': '2025-09-10 02:58:59.980418', 'step': 18296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:00.033485', 'step': 18296, 'epoch': 3} {'type': 'loss', 'content': 0.07759026437997818, 'timestamp': '2025-09-10 02:59:00.035723', 'step': 18297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:00.090235', 'step': 18297, 'epoch': 3} {'type': 'loss', 'content': 0.11052299290895462, 'timestamp': '2025-09-10 02:59:00.092555', 'step': 18298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:00.147337', 'step': 18298, 'epoch': 3} {'type': 'loss', 'content': 0.07555055618286133, 'timestamp': '2025-09-10 02:59:00.150540', 'step': 18299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:00.204614', 'step': 18299, 'epoch': 3} {'type': 'loss', 'content': 0.2069704532623291, 'timestamp': '2025-09-10 02:59:00.210624', 'step': 18300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:00.264665', 'step': 18300, 'epoch': 3} {'type': 'loss', 'content': 0.10294713079929352, 'timestamp': '2025-09-10 02:59:00.266909', 'step': 18301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:00.322439', 'step': 18301, 'epoch': 3} {'type': 'loss', 'content': 0.14392468333244324, 'timestamp': '2025-09-10 02:59:00.324680', 'step': 18302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:00.380973', 'step': 18302, 'epoch': 3} {'type': 'loss', 'content': 0.1619013398885727, 'timestamp': '2025-09-10 02:59:00.383290', 'step': 18303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:00.436994', 'step': 18303, 'epoch': 3} {'type': 'loss', 'content': 0.0457405149936676, 'timestamp': '2025-09-10 02:59:00.442955', 'step': 18304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:00.496354', 'step': 18304, 'epoch': 3} {'type': 'loss', 'content': 0.08566108345985413, 'timestamp': '2025-09-10 02:59:00.498764', 'step': 18305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:00.552341', 'step': 18305, 'epoch': 3} {'type': 'loss', 'content': 0.08858603239059448, 'timestamp': '2025-09-10 02:59:00.554685', 'step': 18306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:00.608452', 'step': 18306, 'epoch': 3} {'type': 'loss', 'content': 0.08525189757347107, 'timestamp': '2025-09-10 02:59:00.610734', 'step': 18307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:00.665293', 'step': 18307, 'epoch': 3} {'type': 'loss', 'content': 0.06249956414103508, 'timestamp': '2025-09-10 02:59:00.671364', 'step': 18308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:00.725024', 'step': 18308, 'epoch': 3} {'type': 'loss', 'content': 0.09980344027280807, 'timestamp': '2025-09-10 02:59:00.727323', 'step': 18309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:00.781503', 'step': 18309, 'epoch': 3} {'type': 'loss', 'content': 0.14635242521762848, 'timestamp': '2025-09-10 02:59:00.783759', 'step': 18310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:00.837836', 'step': 18310, 'epoch': 3} {'type': 'loss', 'content': 0.12507592141628265, 'timestamp': '2025-09-10 02:59:00.840056', 'step': 18311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:00.893796', 'step': 18311, 'epoch': 3} {'type': 'loss', 'content': 0.12227237969636917, 'timestamp': '2025-09-10 02:59:00.899766', 'step': 18312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:00.953012', 'step': 18312, 'epoch': 3} {'type': 'loss', 'content': 0.09948382526636124, 'timestamp': '2025-09-10 02:59:00.955285', 'step': 18313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:01.008801', 'step': 18313, 'epoch': 3} {'type': 'loss', 'content': 0.06463480740785599, 'timestamp': '2025-09-10 02:59:01.011104', 'step': 18314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:01.065214', 'step': 18314, 'epoch': 3} {'type': 'loss', 'content': 0.05842594429850578, 'timestamp': '2025-09-10 02:59:01.067485', 'step': 18315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:01.121498', 'step': 18315, 'epoch': 3} {'type': 'loss', 'content': 0.09092982113361359, 'timestamp': '2025-09-10 02:59:01.127265', 'step': 18316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:01.181249', 'step': 18316, 'epoch': 3} {'type': 'loss', 'content': 0.1271161586046219, 'timestamp': '2025-09-10 02:59:01.184125', 'step': 18317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:01.240794', 'step': 18317, 'epoch': 3} {'type': 'loss', 'content': 0.11042014509439468, 'timestamp': '2025-09-10 02:59:01.243033', 'step': 18318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:01.297167', 'step': 18318, 'epoch': 3} {'type': 'loss', 'content': 0.16456197202205658, 'timestamp': '2025-09-10 02:59:01.299437', 'step': 18319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:01.353088', 'step': 18319, 'epoch': 3} {'type': 'loss', 'content': 0.05625603720545769, 'timestamp': '2025-09-10 02:59:01.359097', 'step': 18320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:01.412441', 'step': 18320, 'epoch': 3} {'type': 'loss', 'content': 0.09631757438182831, 'timestamp': '2025-09-10 02:59:01.414756', 'step': 18321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:01.468477', 'step': 18321, 'epoch': 3} {'type': 'loss', 'content': 0.12247661501169205, 'timestamp': '2025-09-10 02:59:01.470789', 'step': 18322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:01.524675', 'step': 18322, 'epoch': 3} {'type': 'loss', 'content': 0.1365525871515274, 'timestamp': '2025-09-10 02:59:01.526839', 'step': 18323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:01.581346', 'step': 18323, 'epoch': 3} {'type': 'loss', 'content': 0.11375631392002106, 'timestamp': '2025-09-10 02:59:01.586944', 'step': 18324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:01.639396', 'step': 18324, 'epoch': 3} {'type': 'loss', 'content': 0.05918233096599579, 'timestamp': '2025-09-10 02:59:01.641628', 'step': 18325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:01.695734', 'step': 18325, 'epoch': 3} {'type': 'loss', 'content': 0.09522415697574615, 'timestamp': '2025-09-10 02:59:01.698194', 'step': 18326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:01.752741', 'step': 18326, 'epoch': 3} {'type': 'loss', 'content': 0.10072654485702515, 'timestamp': '2025-09-10 02:59:01.755007', 'step': 18327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:01.808354', 'step': 18327, 'epoch': 3} {'type': 'loss', 'content': 0.13387352228164673, 'timestamp': '2025-09-10 02:59:01.814422', 'step': 18328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:01.867931', 'step': 18328, 'epoch': 3} {'type': 'loss', 'content': 0.05165989324450493, 'timestamp': '2025-09-10 02:59:01.870258', 'step': 18329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:01.928933', 'step': 18329, 'epoch': 3} {'type': 'loss', 'content': 0.05494897440075874, 'timestamp': '2025-09-10 02:59:01.931140', 'step': 18330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:01.985312', 'step': 18330, 'epoch': 3} {'type': 'loss', 'content': 0.043592046946287155, 'timestamp': '2025-09-10 02:59:01.987611', 'step': 18331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:02.042338', 'step': 18331, 'epoch': 3} {'type': 'loss', 'content': 0.17035011947155, 'timestamp': '2025-09-10 02:59:02.048232', 'step': 18332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:02.101975', 'step': 18332, 'epoch': 3} {'type': 'loss', 'content': 0.12193424999713898, 'timestamp': '2025-09-10 02:59:02.104176', 'step': 18333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:02.158129', 'step': 18333, 'epoch': 3} {'type': 'loss', 'content': 0.03588452562689781, 'timestamp': '2025-09-10 02:59:02.160400', 'step': 18334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:02.214071', 'step': 18334, 'epoch': 3} {'type': 'loss', 'content': 0.08886387199163437, 'timestamp': '2025-09-10 02:59:02.216425', 'step': 18335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:02.280364', 'step': 18335, 'epoch': 3} {'type': 'loss', 'content': 0.09890048205852509, 'timestamp': '2025-09-10 02:59:02.286853', 'step': 18336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:02.341523', 'step': 18336, 'epoch': 3} {'type': 'loss', 'content': 0.03388961777091026, 'timestamp': '2025-09-10 02:59:02.343661', 'step': 18337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:02.397347', 'step': 18337, 'epoch': 3} {'type': 'loss', 'content': 0.08861177414655685, 'timestamp': '2025-09-10 02:59:02.399654', 'step': 18338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:02.453873', 'step': 18338, 'epoch': 3} {'type': 'loss', 'content': 0.07452824711799622, 'timestamp': '2025-09-10 02:59:02.456033', 'step': 18339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:02.509592', 'step': 18339, 'epoch': 3} {'type': 'loss', 'content': 0.04498640447854996, 'timestamp': '2025-09-10 02:59:02.515597', 'step': 18340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:02.568657', 'step': 18340, 'epoch': 3} {'type': 'loss', 'content': 0.10759072005748749, 'timestamp': '2025-09-10 02:59:02.570833', 'step': 18341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:02.624164', 'step': 18341, 'epoch': 3} {'type': 'loss', 'content': 0.0670282393693924, 'timestamp': '2025-09-10 02:59:02.626514', 'step': 18342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:02.681117', 'step': 18342, 'epoch': 3} {'type': 'loss', 'content': 0.12050390243530273, 'timestamp': '2025-09-10 02:59:02.683423', 'step': 18343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:02.736635', 'step': 18343, 'epoch': 3} {'type': 'loss', 'content': 0.07601486891508102, 'timestamp': '2025-09-10 02:59:02.742565', 'step': 18344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:02.804102', 'step': 18344, 'epoch': 3} {'type': 'loss', 'content': 0.07640229165554047, 'timestamp': '2025-09-10 02:59:02.806427', 'step': 18345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:02.864330', 'step': 18345, 'epoch': 3} {'type': 'loss', 'content': 0.06152302026748657, 'timestamp': '2025-09-10 02:59:02.866641', 'step': 18346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:02.924700', 'step': 18346, 'epoch': 3} {'type': 'loss', 'content': 0.17179647088050842, 'timestamp': '2025-09-10 02:59:02.926840', 'step': 18347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:02.980152', 'step': 18347, 'epoch': 3} {'type': 'loss', 'content': 0.19218195974826813, 'timestamp': '2025-09-10 02:59:02.986194', 'step': 18348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:03.039282', 'step': 18348, 'epoch': 3} {'type': 'loss', 'content': 0.04783912003040314, 'timestamp': '2025-09-10 02:59:03.041635', 'step': 18349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:03.096155', 'step': 18349, 'epoch': 3} {'type': 'loss', 'content': 0.15561901032924652, 'timestamp': '2025-09-10 02:59:03.098444', 'step': 18350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:03.162904', 'step': 18350, 'epoch': 3} {'type': 'loss', 'content': 0.10617666691541672, 'timestamp': '2025-09-10 02:59:03.165115', 'step': 18351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:03.222649', 'step': 18351, 'epoch': 3} {'type': 'loss', 'content': 0.08645033091306686, 'timestamp': '2025-09-10 02:59:03.228711', 'step': 18352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:03.281028', 'step': 18352, 'epoch': 3} {'type': 'loss', 'content': 0.037964340299367905, 'timestamp': '2025-09-10 02:59:03.283304', 'step': 18353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:03.337749', 'step': 18353, 'epoch': 3} {'type': 'loss', 'content': 0.08635298907756805, 'timestamp': '2025-09-10 02:59:03.339927', 'step': 18354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:03.396871', 'step': 18354, 'epoch': 3} {'type': 'loss', 'content': 0.05374479666352272, 'timestamp': '2025-09-10 02:59:03.400933', 'step': 18355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:03.453574', 'step': 18355, 'epoch': 3} {'type': 'loss', 'content': 0.2206783890724182, 'timestamp': '2025-09-10 02:59:03.459663', 'step': 18356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:03.512428', 'step': 18356, 'epoch': 3} {'type': 'loss', 'content': 0.07611774653196335, 'timestamp': '2025-09-10 02:59:03.514652', 'step': 18357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:03.568302', 'step': 18357, 'epoch': 3} {'type': 'loss', 'content': 0.1342359185218811, 'timestamp': '2025-09-10 02:59:03.570568', 'step': 18358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:03.623934', 'step': 18358, 'epoch': 3} {'type': 'loss', 'content': 0.046844907104969025, 'timestamp': '2025-09-10 02:59:03.625940', 'step': 18359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:03.678611', 'step': 18359, 'epoch': 3} {'type': 'loss', 'content': 0.08433794230222702, 'timestamp': '2025-09-10 02:59:03.684306', 'step': 18360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:03.737016', 'step': 18360, 'epoch': 3} {'type': 'loss', 'content': 0.08635267615318298, 'timestamp': '2025-09-10 02:59:03.739341', 'step': 18361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:03.792429', 'step': 18361, 'epoch': 3} {'type': 'loss', 'content': 0.13365383446216583, 'timestamp': '2025-09-10 02:59:03.794991', 'step': 18362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:03.848266', 'step': 18362, 'epoch': 3} {'type': 'loss', 'content': 0.09979912638664246, 'timestamp': '2025-09-10 02:59:03.850646', 'step': 18363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:03.905204', 'step': 18363, 'epoch': 3} {'type': 'loss', 'content': 0.06097980961203575, 'timestamp': '2025-09-10 02:59:03.911387', 'step': 18364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:03.964326', 'step': 18364, 'epoch': 3} {'type': 'loss', 'content': 0.09470948576927185, 'timestamp': '2025-09-10 02:59:03.966510', 'step': 18365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:04.019313', 'step': 18365, 'epoch': 3} {'type': 'loss', 'content': 0.10543463379144669, 'timestamp': '2025-09-10 02:59:04.021675', 'step': 18366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:04.074941', 'step': 18366, 'epoch': 3} {'type': 'loss', 'content': 0.05680333077907562, 'timestamp': '2025-09-10 02:59:04.077139', 'step': 18367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:04.130613', 'step': 18367, 'epoch': 3} {'type': 'loss', 'content': 0.14601488411426544, 'timestamp': '2025-09-10 02:59:04.136568', 'step': 18368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:04.189619', 'step': 18368, 'epoch': 3} {'type': 'loss', 'content': 0.11569032818078995, 'timestamp': '2025-09-10 02:59:04.191805', 'step': 18369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:04.245644', 'step': 18369, 'epoch': 3} {'type': 'loss', 'content': 0.051919251680374146, 'timestamp': '2025-09-10 02:59:04.247782', 'step': 18370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:04.301477', 'step': 18370, 'epoch': 3} {'type': 'loss', 'content': 0.17366954684257507, 'timestamp': '2025-09-10 02:59:04.303554', 'step': 18371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:04.356728', 'step': 18371, 'epoch': 3} {'type': 'loss', 'content': 0.16244155168533325, 'timestamp': '2025-09-10 02:59:04.362671', 'step': 18372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:04.419446', 'step': 18372, 'epoch': 3} {'type': 'loss', 'content': 0.07547560334205627, 'timestamp': '2025-09-10 02:59:04.421772', 'step': 18373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:04.476320', 'step': 18373, 'epoch': 3} {'type': 'loss', 'content': 0.10725127905607224, 'timestamp': '2025-09-10 02:59:04.478734', 'step': 18374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:04.533361', 'step': 18374, 'epoch': 3} {'type': 'loss', 'content': 0.0687655434012413, 'timestamp': '2025-09-10 02:59:04.535816', 'step': 18375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:04.589604', 'step': 18375, 'epoch': 3} {'type': 'loss', 'content': 0.1321898251771927, 'timestamp': '2025-09-10 02:59:04.595602', 'step': 18376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:04.648936', 'step': 18376, 'epoch': 3} {'type': 'loss', 'content': 0.0203902255743742, 'timestamp': '2025-09-10 02:59:04.651131', 'step': 18377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:04.704502', 'step': 18377, 'epoch': 3} {'type': 'loss', 'content': 0.04886171966791153, 'timestamp': '2025-09-10 02:59:04.706842', 'step': 18378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:04.760305', 'step': 18378, 'epoch': 3} {'type': 'loss', 'content': 0.10513053089380264, 'timestamp': '2025-09-10 02:59:04.762429', 'step': 18379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:04.815243', 'step': 18379, 'epoch': 3} {'type': 'loss', 'content': 0.16800850629806519, 'timestamp': '2025-09-10 02:59:04.820842', 'step': 18380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:04.874410', 'step': 18380, 'epoch': 3} {'type': 'loss', 'content': 0.07830150425434113, 'timestamp': '2025-09-10 02:59:04.876707', 'step': 18381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:04.930022', 'step': 18381, 'epoch': 3} {'type': 'loss', 'content': 0.04089531674981117, 'timestamp': '2025-09-10 02:59:04.932190', 'step': 18382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:04.985723', 'step': 18382, 'epoch': 3} {'type': 'loss', 'content': 0.08275085687637329, 'timestamp': '2025-09-10 02:59:04.987860', 'step': 18383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:05.041494', 'step': 18383, 'epoch': 3} {'type': 'loss', 'content': 0.09057765454053879, 'timestamp': '2025-09-10 02:59:05.047489', 'step': 18384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:05.100294', 'step': 18384, 'epoch': 3} {'type': 'loss', 'content': 0.10498564690351486, 'timestamp': '2025-09-10 02:59:05.102375', 'step': 18385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:05.155076', 'step': 18385, 'epoch': 3} {'type': 'loss', 'content': 0.06589219719171524, 'timestamp': '2025-09-10 02:59:05.157684', 'step': 18386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:05.212335', 'step': 18386, 'epoch': 3} {'type': 'loss', 'content': 0.15602488815784454, 'timestamp': '2025-09-10 02:59:05.214545', 'step': 18387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:05.268265', 'step': 18387, 'epoch': 3} {'type': 'loss', 'content': 0.05887547880411148, 'timestamp': '2025-09-10 02:59:05.274419', 'step': 18388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:05.328162', 'step': 18388, 'epoch': 3} {'type': 'loss', 'content': 0.0918874740600586, 'timestamp': '2025-09-10 02:59:05.330379', 'step': 18389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:05.385548', 'step': 18389, 'epoch': 3} {'type': 'loss', 'content': 0.07775717228651047, 'timestamp': '2025-09-10 02:59:05.387812', 'step': 18390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:05.441770', 'step': 18390, 'epoch': 3} {'type': 'loss', 'content': 0.17000806331634521, 'timestamp': '2025-09-10 02:59:05.444193', 'step': 18391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:05.498918', 'step': 18391, 'epoch': 3} {'type': 'loss', 'content': 0.06272413581609726, 'timestamp': '2025-09-10 02:59:05.505167', 'step': 18392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:05.558151', 'step': 18392, 'epoch': 3} {'type': 'loss', 'content': 0.06488605588674545, 'timestamp': '2025-09-10 02:59:05.560542', 'step': 18393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:05.613815', 'step': 18393, 'epoch': 3} {'type': 'loss', 'content': 0.11015989631414413, 'timestamp': '2025-09-10 02:59:05.616078', 'step': 18394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:05.669379', 'step': 18394, 'epoch': 3} {'type': 'loss', 'content': 0.10936693847179413, 'timestamp': '2025-09-10 02:59:05.671444', 'step': 18395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:05.724550', 'step': 18395, 'epoch': 3} {'type': 'loss', 'content': 0.240556538105011, 'timestamp': '2025-09-10 02:59:05.730447', 'step': 18396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:05.782875', 'step': 18396, 'epoch': 3} {'type': 'loss', 'content': 0.05753389745950699, 'timestamp': '2025-09-10 02:59:05.785091', 'step': 18397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:05.838400', 'step': 18397, 'epoch': 3} {'type': 'loss', 'content': 0.09995042532682419, 'timestamp': '2025-09-10 02:59:05.840705', 'step': 18398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:05.894359', 'step': 18398, 'epoch': 3} {'type': 'loss', 'content': 0.10342759639024734, 'timestamp': '2025-09-10 02:59:05.896659', 'step': 18399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:05.949326', 'step': 18399, 'epoch': 3} {'type': 'loss', 'content': 0.10198887437582016, 'timestamp': '2025-09-10 02:59:05.955348', 'step': 18400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:06.008898', 'step': 18400, 'epoch': 3} {'type': 'loss', 'content': 0.1430523544549942, 'timestamp': '2025-09-10 02:59:06.011180', 'step': 18401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:06.064780', 'step': 18401, 'epoch': 3} {'type': 'loss', 'content': 0.16242484748363495, 'timestamp': '2025-09-10 02:59:06.066916', 'step': 18402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:06.120697', 'step': 18402, 'epoch': 3} {'type': 'loss', 'content': 0.14407646656036377, 'timestamp': '2025-09-10 02:59:06.122605', 'step': 18403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:06.177263', 'step': 18403, 'epoch': 3} {'type': 'loss', 'content': 0.09451653808355331, 'timestamp': '2025-09-10 02:59:06.183294', 'step': 18404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:06.237766', 'step': 18404, 'epoch': 3} {'type': 'loss', 'content': 0.08290650695562363, 'timestamp': '2025-09-10 02:59:06.239944', 'step': 18405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:06.298614', 'step': 18405, 'epoch': 3} {'type': 'loss', 'content': 0.04895841330289841, 'timestamp': '2025-09-10 02:59:06.300812', 'step': 18406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:06.354143', 'step': 18406, 'epoch': 3} {'type': 'loss', 'content': 0.09396088123321533, 'timestamp': '2025-09-10 02:59:06.356319', 'step': 18407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:06.411086', 'step': 18407, 'epoch': 3} {'type': 'loss', 'content': 0.0580952987074852, 'timestamp': '2025-09-10 02:59:06.416972', 'step': 18408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:06.487578', 'step': 18408, 'epoch': 3} {'type': 'loss', 'content': 0.08599160611629486, 'timestamp': '2025-09-10 02:59:06.489897', 'step': 18409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:06.552349', 'step': 18409, 'epoch': 3} {'type': 'loss', 'content': 0.06744394451379776, 'timestamp': '2025-09-10 02:59:06.554640', 'step': 18410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:06.612759', 'step': 18410, 'epoch': 3} {'type': 'loss', 'content': 0.06609532982110977, 'timestamp': '2025-09-10 02:59:06.616195', 'step': 18411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:06.674208', 'step': 18411, 'epoch': 3} {'type': 'loss', 'content': 0.0980205163359642, 'timestamp': '2025-09-10 02:59:06.680383', 'step': 18412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:06.738414', 'step': 18412, 'epoch': 3} {'type': 'loss', 'content': 0.023766955360770226, 'timestamp': '2025-09-10 02:59:06.740562', 'step': 18413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:06.795430', 'step': 18413, 'epoch': 3} {'type': 'loss', 'content': 0.152604341506958, 'timestamp': '2025-09-10 02:59:06.797831', 'step': 18414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:06.852088', 'step': 18414, 'epoch': 3} {'type': 'loss', 'content': 0.09222735464572906, 'timestamp': '2025-09-10 02:59:06.854305', 'step': 18415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:06.910472', 'step': 18415, 'epoch': 3} {'type': 'loss', 'content': 0.18941597640514374, 'timestamp': '2025-09-10 02:59:06.916519', 'step': 18416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:59:06.976659', 'step': 18416, 'epoch': 3} {'type': 'loss', 'content': 0.05899326875805855, 'timestamp': '2025-09-10 02:59:06.979186', 'step': 18417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:07.033577', 'step': 18417, 'epoch': 3} {'type': 'loss', 'content': 0.1203155517578125, 'timestamp': '2025-09-10 02:59:07.035740', 'step': 18418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:07.089983', 'step': 18418, 'epoch': 3} {'type': 'loss', 'content': 0.1263481080532074, 'timestamp': '2025-09-10 02:59:07.092330', 'step': 18419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:07.147375', 'step': 18419, 'epoch': 3} {'type': 'loss', 'content': 0.08761949837207794, 'timestamp': '2025-09-10 02:59:07.153725', 'step': 18420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:07.210515', 'step': 18420, 'epoch': 3} {'type': 'loss', 'content': 0.13225063681602478, 'timestamp': '2025-09-10 02:59:07.212802', 'step': 18421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:07.270117', 'step': 18421, 'epoch': 3} {'type': 'loss', 'content': 0.10090164095163345, 'timestamp': '2025-09-10 02:59:07.272552', 'step': 18422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:07.326687', 'step': 18422, 'epoch': 3} {'type': 'loss', 'content': 0.05716641619801521, 'timestamp': '2025-09-10 02:59:07.328991', 'step': 18423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:07.382501', 'step': 18423, 'epoch': 3} {'type': 'loss', 'content': 0.1192394569516182, 'timestamp': '2025-09-10 02:59:07.388688', 'step': 18424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:07.441911', 'step': 18424, 'epoch': 3} {'type': 'loss', 'content': 0.06741196662187576, 'timestamp': '2025-09-10 02:59:07.446861', 'step': 18425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:07.502514', 'step': 18425, 'epoch': 3} {'type': 'loss', 'content': 0.08919207751750946, 'timestamp': '2025-09-10 02:59:07.504770', 'step': 18426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:07.558522', 'step': 18426, 'epoch': 3} {'type': 'loss', 'content': 0.08461460471153259, 'timestamp': '2025-09-10 02:59:07.562964', 'step': 18427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:07.618286', 'step': 18427, 'epoch': 3} {'type': 'loss', 'content': 0.07745926827192307, 'timestamp': '2025-09-10 02:59:07.624462', 'step': 18428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:07.677433', 'step': 18428, 'epoch': 3} {'type': 'loss', 'content': 0.10132014751434326, 'timestamp': '2025-09-10 02:59:07.679700', 'step': 18429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:07.734917', 'step': 18429, 'epoch': 3} {'type': 'loss', 'content': 0.10141308605670929, 'timestamp': '2025-09-10 02:59:07.737192', 'step': 18430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:07.790714', 'step': 18430, 'epoch': 3} {'type': 'loss', 'content': 0.13297800719738007, 'timestamp': '2025-09-10 02:59:07.792996', 'step': 18431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:07.847127', 'step': 18431, 'epoch': 3} {'type': 'loss', 'content': 0.09963629394769669, 'timestamp': '2025-09-10 02:59:07.853309', 'step': 18432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:07.906068', 'step': 18432, 'epoch': 3} {'type': 'loss', 'content': 0.03640790283679962, 'timestamp': '2025-09-10 02:59:07.908183', 'step': 18433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:07.964275', 'step': 18433, 'epoch': 3} {'type': 'loss', 'content': 0.08515975624322891, 'timestamp': '2025-09-10 02:59:07.966495', 'step': 18434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:08.022186', 'step': 18434, 'epoch': 3} {'type': 'loss', 'content': 0.18040840327739716, 'timestamp': '2025-09-10 02:59:08.024502', 'step': 18435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:08.079137', 'step': 18435, 'epoch': 3} {'type': 'loss', 'content': 0.09207337349653244, 'timestamp': '2025-09-10 02:59:08.085075', 'step': 18436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:08.141160', 'step': 18436, 'epoch': 3} {'type': 'loss', 'content': 0.09543225169181824, 'timestamp': '2025-09-10 02:59:08.143334', 'step': 18437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:08.200672', 'step': 18437, 'epoch': 3} {'type': 'loss', 'content': 0.12657047808170319, 'timestamp': '2025-09-10 02:59:08.202941', 'step': 18438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:08.258631', 'step': 18438, 'epoch': 3} {'type': 'loss', 'content': 0.060614556074142456, 'timestamp': '2025-09-10 02:59:08.260707', 'step': 18439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:08.324705', 'step': 18439, 'epoch': 3} {'type': 'loss', 'content': 0.1597755402326584, 'timestamp': '2025-09-10 02:59:08.330742', 'step': 18440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:08.384327', 'step': 18440, 'epoch': 3} {'type': 'loss', 'content': 0.06730581074953079, 'timestamp': '2025-09-10 02:59:08.386476', 'step': 18441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:08.439960', 'step': 18441, 'epoch': 3} {'type': 'loss', 'content': 0.08811663836240768, 'timestamp': '2025-09-10 02:59:08.442190', 'step': 18442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:08.495821', 'step': 18442, 'epoch': 3} {'type': 'loss', 'content': 0.15073342621326447, 'timestamp': '2025-09-10 02:59:08.497994', 'step': 18443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:08.553024', 'step': 18443, 'epoch': 3} {'type': 'loss', 'content': 0.05468782037496567, 'timestamp': '2025-09-10 02:59:08.558698', 'step': 18444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:59:08.624296', 'step': 18444, 'epoch': 3} {'type': 'loss', 'content': 0.04777248948812485, 'timestamp': '2025-09-10 02:59:08.629004', 'step': 18445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:08.696218', 'step': 18445, 'epoch': 3} {'type': 'loss', 'content': 0.07595886290073395, 'timestamp': '2025-09-10 02:59:08.698623', 'step': 18446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:08.753323', 'step': 18446, 'epoch': 3} {'type': 'loss', 'content': 0.15803304314613342, 'timestamp': '2025-09-10 02:59:08.756357', 'step': 18447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:08.810992', 'step': 18447, 'epoch': 3} {'type': 'loss', 'content': 0.045884378254413605, 'timestamp': '2025-09-10 02:59:08.816785', 'step': 18448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:08.870652', 'step': 18448, 'epoch': 3} {'type': 'loss', 'content': 0.08302004635334015, 'timestamp': '2025-09-10 02:59:08.874390', 'step': 18449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:08.931827', 'step': 18449, 'epoch': 3} {'type': 'loss', 'content': 0.07123883813619614, 'timestamp': '2025-09-10 02:59:08.936078', 'step': 18450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:08.994027', 'step': 18450, 'epoch': 3} {'type': 'loss', 'content': 0.05705365166068077, 'timestamp': '2025-09-10 02:59:09.001481', 'step': 18451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:09.059723', 'step': 18451, 'epoch': 3} {'type': 'loss', 'content': 0.07850226759910583, 'timestamp': '2025-09-10 02:59:09.070160', 'step': 18452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:09.123533', 'step': 18452, 'epoch': 3} {'type': 'loss', 'content': 0.09229500591754913, 'timestamp': '2025-09-10 02:59:09.126728', 'step': 18453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:09.181900', 'step': 18453, 'epoch': 3} {'type': 'loss', 'content': 0.11080299317836761, 'timestamp': '2025-09-10 02:59:09.185702', 'step': 18454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:09.245701', 'step': 18454, 'epoch': 3} {'type': 'loss', 'content': 0.08479961007833481, 'timestamp': '2025-09-10 02:59:09.250154', 'step': 18455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:09.312507', 'step': 18455, 'epoch': 3} {'type': 'loss', 'content': 0.08329364657402039, 'timestamp': '2025-09-10 02:59:09.322665', 'step': 18456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:09.383675', 'step': 18456, 'epoch': 3} {'type': 'loss', 'content': 0.11021358519792557, 'timestamp': '2025-09-10 02:59:09.385853', 'step': 18457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:09.440602', 'step': 18457, 'epoch': 3} {'type': 'loss', 'content': 0.025115054100751877, 'timestamp': '2025-09-10 02:59:09.442621', 'step': 18458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:09.502241', 'step': 18458, 'epoch': 3} {'type': 'loss', 'content': 0.15200546383857727, 'timestamp': '2025-09-10 02:59:09.505632', 'step': 18459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:09.564971', 'step': 18459, 'epoch': 3} {'type': 'loss', 'content': 0.14685136079788208, 'timestamp': '2025-09-10 02:59:09.570952', 'step': 18460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:09.627177', 'step': 18460, 'epoch': 3} {'type': 'loss', 'content': 0.0907522514462471, 'timestamp': '2025-09-10 02:59:09.629625', 'step': 18461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:09.689203', 'step': 18461, 'epoch': 3} {'type': 'loss', 'content': 0.15364214777946472, 'timestamp': '2025-09-10 02:59:09.691349', 'step': 18462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:09.747952', 'step': 18462, 'epoch': 3} {'type': 'loss', 'content': 0.09982322156429291, 'timestamp': '2025-09-10 02:59:09.750022', 'step': 18463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:09.810622', 'step': 18463, 'epoch': 3} {'type': 'loss', 'content': 0.09246591478586197, 'timestamp': '2025-09-10 02:59:09.816776', 'step': 18464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:09.870279', 'step': 18464, 'epoch': 3} {'type': 'loss', 'content': 0.07375524193048477, 'timestamp': '2025-09-10 02:59:09.872794', 'step': 18465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:09.927087', 'step': 18465, 'epoch': 3} {'type': 'loss', 'content': 0.17612698674201965, 'timestamp': '2025-09-10 02:59:09.929308', 'step': 18466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:59:09.985053', 'step': 18466, 'epoch': 3} {'type': 'loss', 'content': 0.13656985759735107, 'timestamp': '2025-09-10 02:59:09.987526', 'step': 18467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:10.042185', 'step': 18467, 'epoch': 3} {'type': 'loss', 'content': 0.04729066416621208, 'timestamp': '2025-09-10 02:59:10.051094', 'step': 18468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:10.103722', 'step': 18468, 'epoch': 3} {'type': 'loss', 'content': 0.0322248600423336, 'timestamp': '2025-09-10 02:59:10.105668', 'step': 18469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:10.159683', 'step': 18469, 'epoch': 3} {'type': 'loss', 'content': 0.07881773263216019, 'timestamp': '2025-09-10 02:59:10.161756', 'step': 18470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:10.215955', 'step': 18470, 'epoch': 3} {'type': 'loss', 'content': 0.08387285470962524, 'timestamp': '2025-09-10 02:59:10.218003', 'step': 18471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:10.272209', 'step': 18471, 'epoch': 3} {'type': 'loss', 'content': 0.0586233027279377, 'timestamp': '2025-09-10 02:59:10.278603', 'step': 18472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:10.332026', 'step': 18472, 'epoch': 3} {'type': 'loss', 'content': 0.06283752620220184, 'timestamp': '2025-09-10 02:59:10.334348', 'step': 18473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:10.388614', 'step': 18473, 'epoch': 3} {'type': 'loss', 'content': 0.08389008045196533, 'timestamp': '2025-09-10 02:59:10.390656', 'step': 18474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:10.444271', 'step': 18474, 'epoch': 3} {'type': 'loss', 'content': 0.06791884452104568, 'timestamp': '2025-09-10 02:59:10.446533', 'step': 18475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:10.499618', 'step': 18475, 'epoch': 3} {'type': 'loss', 'content': 0.04835798591375351, 'timestamp': '2025-09-10 02:59:10.506049', 'step': 18476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:10.561059', 'step': 18476, 'epoch': 3} {'type': 'loss', 'content': 0.08594217151403427, 'timestamp': '2025-09-10 02:59:10.563467', 'step': 18477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:10.617233', 'step': 18477, 'epoch': 3} {'type': 'loss', 'content': 0.09948393702507019, 'timestamp': '2025-09-10 02:59:10.619434', 'step': 18478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:10.676400', 'step': 18478, 'epoch': 3} {'type': 'loss', 'content': 0.16884340345859528, 'timestamp': '2025-09-10 02:59:10.685606', 'step': 18479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:10.739378', 'step': 18479, 'epoch': 3} {'type': 'loss', 'content': 0.023767026141285896, 'timestamp': '2025-09-10 02:59:10.745159', 'step': 18480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:10.799611', 'step': 18480, 'epoch': 3} {'type': 'loss', 'content': 0.095903180539608, 'timestamp': '2025-09-10 02:59:10.802851', 'step': 18481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:10.857497', 'step': 18481, 'epoch': 3} {'type': 'loss', 'content': 0.107343889772892, 'timestamp': '2025-09-10 02:59:10.859878', 'step': 18482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:10.923952', 'step': 18482, 'epoch': 3} {'type': 'loss', 'content': 0.07868802547454834, 'timestamp': '2025-09-10 02:59:10.926091', 'step': 18483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:10.984763', 'step': 18483, 'epoch': 3} {'type': 'loss', 'content': 0.08640236407518387, 'timestamp': '2025-09-10 02:59:10.990602', 'step': 18484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:11.043036', 'step': 18484, 'epoch': 3} {'type': 'loss', 'content': 0.06842254102230072, 'timestamp': '2025-09-10 02:59:11.045443', 'step': 18485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:11.098687', 'step': 18485, 'epoch': 3} {'type': 'loss', 'content': 0.12500223517417908, 'timestamp': '2025-09-10 02:59:11.104060', 'step': 18486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:11.165745', 'step': 18486, 'epoch': 3} {'type': 'loss', 'content': 0.03312770649790764, 'timestamp': '2025-09-10 02:59:11.167739', 'step': 18487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:11.224720', 'step': 18487, 'epoch': 3} {'type': 'loss', 'content': 0.06132713332772255, 'timestamp': '2025-09-10 02:59:11.230569', 'step': 18488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:11.284331', 'step': 18488, 'epoch': 3} {'type': 'loss', 'content': 0.12481244653463364, 'timestamp': '2025-09-10 02:59:11.286740', 'step': 18489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:11.346134', 'step': 18489, 'epoch': 3} {'type': 'loss', 'content': 0.13305771350860596, 'timestamp': '2025-09-10 02:59:11.347888', 'step': 18490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:11.402181', 'step': 18490, 'epoch': 3} {'type': 'loss', 'content': 0.09073959290981293, 'timestamp': '2025-09-10 02:59:11.404502', 'step': 18491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:11.460368', 'step': 18491, 'epoch': 3} {'type': 'loss', 'content': 0.13515667617321014, 'timestamp': '2025-09-10 02:59:11.466268', 'step': 18492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:11.519987', 'step': 18492, 'epoch': 3} {'type': 'loss', 'content': 0.08736293017864227, 'timestamp': '2025-09-10 02:59:11.522272', 'step': 18493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:11.575262', 'step': 18493, 'epoch': 3} {'type': 'loss', 'content': 0.15252217650413513, 'timestamp': '2025-09-10 02:59:11.580011', 'step': 18494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:11.635836', 'step': 18494, 'epoch': 3} {'type': 'loss', 'content': 0.08531046658754349, 'timestamp': '2025-09-10 02:59:11.637732', 'step': 18495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:11.695710', 'step': 18495, 'epoch': 3} {'type': 'loss', 'content': 0.11514462530612946, 'timestamp': '2025-09-10 02:59:11.701246', 'step': 18496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:11.753795', 'step': 18496, 'epoch': 3} {'type': 'loss', 'content': 0.13554972410202026, 'timestamp': '2025-09-10 02:59:11.755648', 'step': 18497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:11.808762', 'step': 18497, 'epoch': 3} {'type': 'loss', 'content': 0.11222997307777405, 'timestamp': '2025-09-10 02:59:11.810602', 'step': 18498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:11.863766', 'step': 18498, 'epoch': 3} {'type': 'loss', 'content': 0.06807675957679749, 'timestamp': '2025-09-10 02:59:11.865853', 'step': 18499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:11.921026', 'step': 18499, 'epoch': 3} {'type': 'loss', 'content': 0.0403774119913578, 'timestamp': '2025-09-10 02:59:11.926827', 'step': 18500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18500', 'timestamp': '2025-09-10 02:59:12.302141', 'step': 18500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:12.359892', 'step': 18500, 'epoch': 3} {'type': 'loss', 'content': 0.08779938519001007, 'timestamp': '2025-09-10 02:59:12.362123', 'step': 18501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:12.415943', 'step': 18501, 'epoch': 3} {'type': 'loss', 'content': 0.0867433026432991, 'timestamp': '2025-09-10 02:59:12.418309', 'step': 18502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:12.472648', 'step': 18502, 'epoch': 3} {'type': 'loss', 'content': 0.04758317768573761, 'timestamp': '2025-09-10 02:59:12.475241', 'step': 18503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:12.531112', 'step': 18503, 'epoch': 3} {'type': 'loss', 'content': 0.10084826499223709, 'timestamp': '2025-09-10 02:59:12.537545', 'step': 18504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:12.590175', 'step': 18504, 'epoch': 3} {'type': 'loss', 'content': 0.1514759659767151, 'timestamp': '2025-09-10 02:59:12.596433', 'step': 18505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:12.659755', 'step': 18505, 'epoch': 3} {'type': 'loss', 'content': 0.20407873392105103, 'timestamp': '2025-09-10 02:59:12.661683', 'step': 18506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:12.715832', 'step': 18506, 'epoch': 3} {'type': 'loss', 'content': 0.026620404794812202, 'timestamp': '2025-09-10 02:59:12.718372', 'step': 18507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:12.772466', 'step': 18507, 'epoch': 3} {'type': 'loss', 'content': 0.08750444650650024, 'timestamp': '2025-09-10 02:59:12.778240', 'step': 18508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:12.832941', 'step': 18508, 'epoch': 3} {'type': 'loss', 'content': 0.1156211569905281, 'timestamp': '2025-09-10 02:59:12.834844', 'step': 18509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:12.887613', 'step': 18509, 'epoch': 3} {'type': 'loss', 'content': 0.1117975190281868, 'timestamp': '2025-09-10 02:59:12.891632', 'step': 18510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:12.945621', 'step': 18510, 'epoch': 3} {'type': 'loss', 'content': 0.044010985642671585, 'timestamp': '2025-09-10 02:59:12.947708', 'step': 18511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:13.001815', 'step': 18511, 'epoch': 3} {'type': 'loss', 'content': 0.0502215214073658, 'timestamp': '2025-09-10 02:59:13.007750', 'step': 18512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:13.061376', 'step': 18512, 'epoch': 3} {'type': 'loss', 'content': 0.04344330355525017, 'timestamp': '2025-09-10 02:59:13.063355', 'step': 18513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:13.116639', 'step': 18513, 'epoch': 3} {'type': 'loss', 'content': 0.14488829672336578, 'timestamp': '2025-09-10 02:59:13.118414', 'step': 18514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:13.174020', 'step': 18514, 'epoch': 3} {'type': 'loss', 'content': 0.10304833203554153, 'timestamp': '2025-09-10 02:59:13.176072', 'step': 18515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:13.231026', 'step': 18515, 'epoch': 3} {'type': 'loss', 'content': 0.05717010796070099, 'timestamp': '2025-09-10 02:59:13.237348', 'step': 18516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:13.291781', 'step': 18516, 'epoch': 3} {'type': 'loss', 'content': 0.13693732023239136, 'timestamp': '2025-09-10 02:59:13.296470', 'step': 18517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:13.356087', 'step': 18517, 'epoch': 3} {'type': 'loss', 'content': 0.05021490529179573, 'timestamp': '2025-09-10 02:59:13.359427', 'step': 18518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:13.418136', 'step': 18518, 'epoch': 3} {'type': 'loss', 'content': 0.06130879744887352, 'timestamp': '2025-09-10 02:59:13.420435', 'step': 18519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:13.474221', 'step': 18519, 'epoch': 3} {'type': 'loss', 'content': 0.09006103128194809, 'timestamp': '2025-09-10 02:59:13.480170', 'step': 18520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:13.534244', 'step': 18520, 'epoch': 3} {'type': 'loss', 'content': 0.12203042954206467, 'timestamp': '2025-09-10 02:59:13.536361', 'step': 18521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:13.591173', 'step': 18521, 'epoch': 3} {'type': 'loss', 'content': 0.03342972695827484, 'timestamp': '2025-09-10 02:59:13.593199', 'step': 18522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:13.646454', 'step': 18522, 'epoch': 3} {'type': 'loss', 'content': 0.050522297620773315, 'timestamp': '2025-09-10 02:59:13.648432', 'step': 18523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:13.701890', 'step': 18523, 'epoch': 3} {'type': 'loss', 'content': 0.11398264765739441, 'timestamp': '2025-09-10 02:59:13.707863', 'step': 18524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:13.765611', 'step': 18524, 'epoch': 3} {'type': 'loss', 'content': 0.04359113425016403, 'timestamp': '2025-09-10 02:59:13.768311', 'step': 18525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:13.823305', 'step': 18525, 'epoch': 3} {'type': 'loss', 'content': 0.06006523221731186, 'timestamp': '2025-09-10 02:59:13.825508', 'step': 18526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:13.880619', 'step': 18526, 'epoch': 3} {'type': 'loss', 'content': 0.1895110160112381, 'timestamp': '2025-09-10 02:59:13.883213', 'step': 18527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:13.939789', 'step': 18527, 'epoch': 3} {'type': 'loss', 'content': 0.08963131159543991, 'timestamp': '2025-09-10 02:59:13.945545', 'step': 18528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:14.002207', 'step': 18528, 'epoch': 3} {'type': 'loss', 'content': 0.10544085502624512, 'timestamp': '2025-09-10 02:59:14.004450', 'step': 18529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:14.058350', 'step': 18529, 'epoch': 3} {'type': 'loss', 'content': 0.09682458639144897, 'timestamp': '2025-09-10 02:59:14.060191', 'step': 18530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:14.114314', 'step': 18530, 'epoch': 3} {'type': 'loss', 'content': 0.2097359299659729, 'timestamp': '2025-09-10 02:59:14.116190', 'step': 18531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:14.169444', 'step': 18531, 'epoch': 3} {'type': 'loss', 'content': 0.13506180047988892, 'timestamp': '2025-09-10 02:59:14.182166', 'step': 18532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:14.242750', 'step': 18532, 'epoch': 3} {'type': 'loss', 'content': 0.10311684757471085, 'timestamp': '2025-09-10 02:59:14.244812', 'step': 18533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:14.300917', 'step': 18533, 'epoch': 3} {'type': 'loss', 'content': 0.18480895459651947, 'timestamp': '2025-09-10 02:59:14.303198', 'step': 18534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:14.359728', 'step': 18534, 'epoch': 3} {'type': 'loss', 'content': 0.03449472039937973, 'timestamp': '2025-09-10 02:59:14.362021', 'step': 18535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:14.417829', 'step': 18535, 'epoch': 3} {'type': 'loss', 'content': 0.1744856983423233, 'timestamp': '2025-09-10 02:59:14.423795', 'step': 18536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:14.477501', 'step': 18536, 'epoch': 3} {'type': 'loss', 'content': 0.0727546364068985, 'timestamp': '2025-09-10 02:59:14.479576', 'step': 18537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:14.536449', 'step': 18537, 'epoch': 3} {'type': 'loss', 'content': 0.10603661090135574, 'timestamp': '2025-09-10 02:59:14.538253', 'step': 18538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:14.593700', 'step': 18538, 'epoch': 3} {'type': 'loss', 'content': 0.06912888586521149, 'timestamp': '2025-09-10 02:59:14.595624', 'step': 18539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:14.649230', 'step': 18539, 'epoch': 3} {'type': 'loss', 'content': 0.11775107681751251, 'timestamp': '2025-09-10 02:59:14.654821', 'step': 18540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:14.707890', 'step': 18540, 'epoch': 3} {'type': 'loss', 'content': 0.1296331286430359, 'timestamp': '2025-09-10 02:59:14.710719', 'step': 18541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:14.768751', 'step': 18541, 'epoch': 3} {'type': 'loss', 'content': 0.09709333628416061, 'timestamp': '2025-09-10 02:59:14.770725', 'step': 18542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:14.824509', 'step': 18542, 'epoch': 3} {'type': 'loss', 'content': 0.07874595373868942, 'timestamp': '2025-09-10 02:59:14.826816', 'step': 18543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:14.886892', 'step': 18543, 'epoch': 3} {'type': 'loss', 'content': 0.1629369705915451, 'timestamp': '2025-09-10 02:59:14.892726', 'step': 18544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:14.947448', 'step': 18544, 'epoch': 3} {'type': 'loss', 'content': 0.09640714526176453, 'timestamp': '2025-09-10 02:59:14.949495', 'step': 18545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:15.003065', 'step': 18545, 'epoch': 3} {'type': 'loss', 'content': 0.022636927664279938, 'timestamp': '2025-09-10 02:59:15.005207', 'step': 18546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:15.061349', 'step': 18546, 'epoch': 3} {'type': 'loss', 'content': 0.0269942544400692, 'timestamp': '2025-09-10 02:59:15.063300', 'step': 18547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:15.121335', 'step': 18547, 'epoch': 3} {'type': 'loss', 'content': 0.07741731405258179, 'timestamp': '2025-09-10 02:59:15.127200', 'step': 18548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:15.180742', 'step': 18548, 'epoch': 3} {'type': 'loss', 'content': 0.1252189427614212, 'timestamp': '2025-09-10 02:59:15.182584', 'step': 18549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:15.236293', 'step': 18549, 'epoch': 3} {'type': 'loss', 'content': 0.10160534083843231, 'timestamp': '2025-09-10 02:59:15.241775', 'step': 18550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:15.296507', 'step': 18550, 'epoch': 3} {'type': 'loss', 'content': 0.1099817156791687, 'timestamp': '2025-09-10 02:59:15.298929', 'step': 18551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:15.353165', 'step': 18551, 'epoch': 3} {'type': 'loss', 'content': 0.17058557271957397, 'timestamp': '2025-09-10 02:59:15.365128', 'step': 18552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:15.423926', 'step': 18552, 'epoch': 3} {'type': 'loss', 'content': 0.12286876142024994, 'timestamp': '2025-09-10 02:59:15.426176', 'step': 18553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:15.479502', 'step': 18553, 'epoch': 3} {'type': 'loss', 'content': 0.09910579770803452, 'timestamp': '2025-09-10 02:59:15.481546', 'step': 18554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:15.534776', 'step': 18554, 'epoch': 3} {'type': 'loss', 'content': 0.03208279237151146, 'timestamp': '2025-09-10 02:59:15.536570', 'step': 18555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:15.597015', 'step': 18555, 'epoch': 3} {'type': 'loss', 'content': 0.1331886500120163, 'timestamp': '2025-09-10 02:59:15.602569', 'step': 18556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:15.662068', 'step': 18556, 'epoch': 3} {'type': 'loss', 'content': 0.06700250506401062, 'timestamp': '2025-09-10 02:59:15.663991', 'step': 18557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:15.721722', 'step': 18557, 'epoch': 3} {'type': 'loss', 'content': 0.09320078790187836, 'timestamp': '2025-09-10 02:59:15.724468', 'step': 18558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:15.782580', 'step': 18558, 'epoch': 3} {'type': 'loss', 'content': 0.11320597678422928, 'timestamp': '2025-09-10 02:59:15.785125', 'step': 18559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:15.841070', 'step': 18559, 'epoch': 3} {'type': 'loss', 'content': 0.1529325693845749, 'timestamp': '2025-09-10 02:59:15.850882', 'step': 18560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:15.915926', 'step': 18560, 'epoch': 3} {'type': 'loss', 'content': 0.08701545745134354, 'timestamp': '2025-09-10 02:59:15.918184', 'step': 18561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:15.972351', 'step': 18561, 'epoch': 3} {'type': 'loss', 'content': 0.09700686484575272, 'timestamp': '2025-09-10 02:59:15.974191', 'step': 18562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:16.032473', 'step': 18562, 'epoch': 3} {'type': 'loss', 'content': 0.07200402021408081, 'timestamp': '2025-09-10 02:59:16.034265', 'step': 18563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:16.092403', 'step': 18563, 'epoch': 3} {'type': 'loss', 'content': 0.08775421231985092, 'timestamp': '2025-09-10 02:59:16.098258', 'step': 18564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:16.155978', 'step': 18564, 'epoch': 3} {'type': 'loss', 'content': 0.06756733357906342, 'timestamp': '2025-09-10 02:59:16.158374', 'step': 18565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:16.213035', 'step': 18565, 'epoch': 3} {'type': 'loss', 'content': 0.07349036633968353, 'timestamp': '2025-09-10 02:59:16.215294', 'step': 18566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:16.268727', 'step': 18566, 'epoch': 3} {'type': 'loss', 'content': 0.04881254583597183, 'timestamp': '2025-09-10 02:59:16.278065', 'step': 18567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:16.340646', 'step': 18567, 'epoch': 3} {'type': 'loss', 'content': 0.1388600766658783, 'timestamp': '2025-09-10 02:59:16.346681', 'step': 18568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:16.400654', 'step': 18568, 'epoch': 3} {'type': 'loss', 'content': 0.08171648532152176, 'timestamp': '2025-09-10 02:59:16.402907', 'step': 18569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:16.457380', 'step': 18569, 'epoch': 3} {'type': 'loss', 'content': 0.18109534680843353, 'timestamp': '2025-09-10 02:59:16.459592', 'step': 18570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:16.518716', 'step': 18570, 'epoch': 3} {'type': 'loss', 'content': 0.08837678283452988, 'timestamp': '2025-09-10 02:59:16.526403', 'step': 18571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:59:16.592181', 'step': 18571, 'epoch': 3} {'type': 'loss', 'content': 0.07506226003170013, 'timestamp': '2025-09-10 02:59:16.603517', 'step': 18572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:16.661660', 'step': 18572, 'epoch': 3} {'type': 'loss', 'content': 0.04491795599460602, 'timestamp': '2025-09-10 02:59:16.663880', 'step': 18573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:16.718415', 'step': 18573, 'epoch': 3} {'type': 'loss', 'content': 0.11139063537120819, 'timestamp': '2025-09-10 02:59:16.720618', 'step': 18574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:16.779517', 'step': 18574, 'epoch': 3} {'type': 'loss', 'content': 0.044946420937776566, 'timestamp': '2025-09-10 02:59:16.781628', 'step': 18575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:16.842734', 'step': 18575, 'epoch': 3} {'type': 'loss', 'content': 0.14498746395111084, 'timestamp': '2025-09-10 02:59:16.848971', 'step': 18576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:16.902981', 'step': 18576, 'epoch': 3} {'type': 'loss', 'content': 0.11888594180345535, 'timestamp': '2025-09-10 02:59:16.905229', 'step': 18577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:16.962011', 'step': 18577, 'epoch': 3} {'type': 'loss', 'content': 0.058849748224020004, 'timestamp': '2025-09-10 02:59:16.964414', 'step': 18578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:17.018238', 'step': 18578, 'epoch': 3} {'type': 'loss', 'content': 0.08935488760471344, 'timestamp': '2025-09-10 02:59:17.020668', 'step': 18579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:17.073605', 'step': 18579, 'epoch': 3} {'type': 'loss', 'content': 0.18815769255161285, 'timestamp': '2025-09-10 02:59:17.079592', 'step': 18580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:17.136341', 'step': 18580, 'epoch': 3} {'type': 'loss', 'content': 0.1041153073310852, 'timestamp': '2025-09-10 02:59:17.138697', 'step': 18581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:17.195456', 'step': 18581, 'epoch': 3} {'type': 'loss', 'content': 0.10203737020492554, 'timestamp': '2025-09-10 02:59:17.197686', 'step': 18582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:17.251208', 'step': 18582, 'epoch': 3} {'type': 'loss', 'content': 0.17036989331245422, 'timestamp': '2025-09-10 02:59:17.253446', 'step': 18583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:17.306673', 'step': 18583, 'epoch': 3} {'type': 'loss', 'content': 0.09131062030792236, 'timestamp': '2025-09-10 02:59:17.312630', 'step': 18584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:59:17.365368', 'step': 18584, 'epoch': 3} {'type': 'loss', 'content': 0.08520328998565674, 'timestamp': '2025-09-10 02:59:17.367640', 'step': 18585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:17.420290', 'step': 18585, 'epoch': 3} {'type': 'loss', 'content': 0.13737161457538605, 'timestamp': '2025-09-10 02:59:17.423945', 'step': 18586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:17.479055', 'step': 18586, 'epoch': 3} {'type': 'loss', 'content': 0.11078479886054993, 'timestamp': '2025-09-10 02:59:17.482981', 'step': 18587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:17.538181', 'step': 18587, 'epoch': 3} {'type': 'loss', 'content': 0.05477266386151314, 'timestamp': '2025-09-10 02:59:17.544097', 'step': 18588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:17.602512', 'step': 18588, 'epoch': 3} {'type': 'loss', 'content': 0.03758656606078148, 'timestamp': '2025-09-10 02:59:17.605491', 'step': 18589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:17.659850', 'step': 18589, 'epoch': 3} {'type': 'loss', 'content': 0.11603306233882904, 'timestamp': '2025-09-10 02:59:17.662251', 'step': 18590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:17.716200', 'step': 18590, 'epoch': 3} {'type': 'loss', 'content': 0.05744488909840584, 'timestamp': '2025-09-10 02:59:17.722905', 'step': 18591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:17.779907', 'step': 18591, 'epoch': 3} {'type': 'loss', 'content': 0.10576606541872025, 'timestamp': '2025-09-10 02:59:17.794614', 'step': 18592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:17.848438', 'step': 18592, 'epoch': 3} {'type': 'loss', 'content': 0.05591687560081482, 'timestamp': '2025-09-10 02:59:17.850805', 'step': 18593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:17.906420', 'step': 18593, 'epoch': 3} {'type': 'loss', 'content': 0.08941417932510376, 'timestamp': '2025-09-10 02:59:17.911235', 'step': 18594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:17.968164', 'step': 18594, 'epoch': 3} {'type': 'loss', 'content': 0.1626281440258026, 'timestamp': '2025-09-10 02:59:17.970408', 'step': 18595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:18.024357', 'step': 18595, 'epoch': 3} {'type': 'loss', 'content': 0.012465829961001873, 'timestamp': '2025-09-10 02:59:18.030283', 'step': 18596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:18.083655', 'step': 18596, 'epoch': 3} {'type': 'loss', 'content': 0.1022176593542099, 'timestamp': '2025-09-10 02:59:18.085843', 'step': 18597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:18.142249', 'step': 18597, 'epoch': 3} {'type': 'loss', 'content': 0.06863562762737274, 'timestamp': '2025-09-10 02:59:18.144515', 'step': 18598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:18.200228', 'step': 18598, 'epoch': 3} {'type': 'loss', 'content': 0.11391127109527588, 'timestamp': '2025-09-10 02:59:18.203352', 'step': 18599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:18.257465', 'step': 18599, 'epoch': 3} {'type': 'loss', 'content': 0.18408796191215515, 'timestamp': '2025-09-10 02:59:18.263297', 'step': 18600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:59:18.317821', 'step': 18600, 'epoch': 3} {'type': 'loss', 'content': 0.05907813832163811, 'timestamp': '2025-09-10 02:59:18.320166', 'step': 18601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:18.375085', 'step': 18601, 'epoch': 3} {'type': 'loss', 'content': 0.05288210138678551, 'timestamp': '2025-09-10 02:59:18.377574', 'step': 18602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:18.431808', 'step': 18602, 'epoch': 3} {'type': 'loss', 'content': 0.11203702539205551, 'timestamp': '2025-09-10 02:59:18.434756', 'step': 18603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:18.495511', 'step': 18603, 'epoch': 3} {'type': 'loss', 'content': 0.10522589087486267, 'timestamp': '2025-09-10 02:59:18.501329', 'step': 18604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:18.560989', 'step': 18604, 'epoch': 3} {'type': 'loss', 'content': 0.07975517958402634, 'timestamp': '2025-09-10 02:59:18.563301', 'step': 18605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:18.624677', 'step': 18605, 'epoch': 3} {'type': 'loss', 'content': 0.2355203777551651, 'timestamp': '2025-09-10 02:59:18.626724', 'step': 18606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:18.686595', 'step': 18606, 'epoch': 3} {'type': 'loss', 'content': 0.06237967684864998, 'timestamp': '2025-09-10 02:59:18.688835', 'step': 18607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:18.742246', 'step': 18607, 'epoch': 3} {'type': 'loss', 'content': 0.06862565875053406, 'timestamp': '2025-09-10 02:59:18.752625', 'step': 18608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:18.809936', 'step': 18608, 'epoch': 3} {'type': 'loss', 'content': 0.10752645879983902, 'timestamp': '2025-09-10 02:59:18.812233', 'step': 18609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:18.881866', 'step': 18609, 'epoch': 3} {'type': 'loss', 'content': 0.1384395807981491, 'timestamp': '2025-09-10 02:59:18.884183', 'step': 18610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:18.939194', 'step': 18610, 'epoch': 3} {'type': 'loss', 'content': 0.09193440526723862, 'timestamp': '2025-09-10 02:59:18.941441', 'step': 18611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:18.995664', 'step': 18611, 'epoch': 3} {'type': 'loss', 'content': 0.0870787501335144, 'timestamp': '2025-09-10 02:59:19.001533', 'step': 18612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:19.055008', 'step': 18612, 'epoch': 3} {'type': 'loss', 'content': 0.08003023266792297, 'timestamp': '2025-09-10 02:59:19.059294', 'step': 18613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:19.114628', 'step': 18613, 'epoch': 3} {'type': 'loss', 'content': 0.13951976597309113, 'timestamp': '2025-09-10 02:59:19.116734', 'step': 18614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:19.169998', 'step': 18614, 'epoch': 3} {'type': 'loss', 'content': 0.170495867729187, 'timestamp': '2025-09-10 02:59:19.172212', 'step': 18615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:19.228668', 'step': 18615, 'epoch': 3} {'type': 'loss', 'content': 0.039068978279829025, 'timestamp': '2025-09-10 02:59:19.237775', 'step': 18616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:59:19.297287', 'step': 18616, 'epoch': 3} {'type': 'loss', 'content': 0.03335779905319214, 'timestamp': '2025-09-10 02:59:19.299357', 'step': 18617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:19.354112', 'step': 18617, 'epoch': 3} {'type': 'loss', 'content': 0.05089525878429413, 'timestamp': '2025-09-10 02:59:19.356366', 'step': 18618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:19.415118', 'step': 18618, 'epoch': 3} {'type': 'loss', 'content': 0.0687430128455162, 'timestamp': '2025-09-10 02:59:19.420294', 'step': 18619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:19.475783', 'step': 18619, 'epoch': 3} {'type': 'loss', 'content': 0.05396157503128052, 'timestamp': '2025-09-10 02:59:19.485733', 'step': 18620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:19.542207', 'step': 18620, 'epoch': 3} {'type': 'loss', 'content': 0.12732098996639252, 'timestamp': '2025-09-10 02:59:19.545539', 'step': 18621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:19.601177', 'step': 18621, 'epoch': 3} {'type': 'loss', 'content': 0.0925721675157547, 'timestamp': '2025-09-10 02:59:19.605158', 'step': 18622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:19.665694', 'step': 18622, 'epoch': 3} {'type': 'loss', 'content': 0.08077524602413177, 'timestamp': '2025-09-10 02:59:19.672746', 'step': 18623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:19.742702', 'step': 18623, 'epoch': 3} {'type': 'loss', 'content': 0.12624891102313995, 'timestamp': '2025-09-10 02:59:19.753912', 'step': 18624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:19.808355', 'step': 18624, 'epoch': 3} {'type': 'loss', 'content': 0.13613754510879517, 'timestamp': '2025-09-10 02:59:19.817292', 'step': 18625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:19.881729', 'step': 18625, 'epoch': 3} {'type': 'loss', 'content': 0.07149066030979156, 'timestamp': '2025-09-10 02:59:19.884008', 'step': 18626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:19.940369', 'step': 18626, 'epoch': 3} {'type': 'loss', 'content': 0.18503159284591675, 'timestamp': '2025-09-10 02:59:19.942635', 'step': 18627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:19.997506', 'step': 18627, 'epoch': 3} {'type': 'loss', 'content': 0.11713994294404984, 'timestamp': '2025-09-10 02:59:20.003456', 'step': 18628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:20.059493', 'step': 18628, 'epoch': 3} {'type': 'loss', 'content': 0.11561036854982376, 'timestamp': '2025-09-10 02:59:20.061753', 'step': 18629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:20.116559', 'step': 18629, 'epoch': 3} {'type': 'loss', 'content': 0.041651755571365356, 'timestamp': '2025-09-10 02:59:20.118727', 'step': 18630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:20.172063', 'step': 18630, 'epoch': 3} {'type': 'loss', 'content': 0.10453380644321442, 'timestamp': '2025-09-10 02:59:20.174345', 'step': 18631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:20.226981', 'step': 18631, 'epoch': 3} {'type': 'loss', 'content': 0.050964489579200745, 'timestamp': '2025-09-10 02:59:20.232643', 'step': 18632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:20.285582', 'step': 18632, 'epoch': 3} {'type': 'loss', 'content': 0.13289283215999603, 'timestamp': '2025-09-10 02:59:20.289221', 'step': 18633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:20.344671', 'step': 18633, 'epoch': 3} {'type': 'loss', 'content': 0.12882468104362488, 'timestamp': '2025-09-10 02:59:20.346666', 'step': 18634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:20.402254', 'step': 18634, 'epoch': 3} {'type': 'loss', 'content': 0.13770155608654022, 'timestamp': '2025-09-10 02:59:20.406512', 'step': 18635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:20.473370', 'step': 18635, 'epoch': 3} {'type': 'loss', 'content': 0.16621588170528412, 'timestamp': '2025-09-10 02:59:20.479717', 'step': 18636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:20.532222', 'step': 18636, 'epoch': 3} {'type': 'loss', 'content': 0.05406579375267029, 'timestamp': '2025-09-10 02:59:20.534406', 'step': 18637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:20.588088', 'step': 18637, 'epoch': 3} {'type': 'loss', 'content': 0.05069320648908615, 'timestamp': '2025-09-10 02:59:20.590051', 'step': 18638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:20.646789', 'step': 18638, 'epoch': 3} {'type': 'loss', 'content': 0.11190079152584076, 'timestamp': '2025-09-10 02:59:20.648905', 'step': 18639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:20.704610', 'step': 18639, 'epoch': 3} {'type': 'loss', 'content': 0.07891522347927094, 'timestamp': '2025-09-10 02:59:20.710273', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 02:59:34.666345', 'step': 18640, 'epoch': 3} {'type': 'pplx', 'content': 11435.44243007506, 'timestamp': '2025-09-10 02:59:34.669648', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:34.723621', 'step': 18640, 'epoch': 3} {'type': 'loss', 'content': 0.12504123151302338, 'timestamp': '2025-09-10 02:59:34.725948', 'step': 18641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:34.780230', 'step': 18641, 'epoch': 3} {'type': 'loss', 'content': 0.026586249470710754, 'timestamp': '2025-09-10 02:59:34.782513', 'step': 18642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:34.835897', 'step': 18642, 'epoch': 3} {'type': 'loss', 'content': 0.12493374198675156, 'timestamp': '2025-09-10 02:59:34.838157', 'step': 18643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:34.894743', 'step': 18643, 'epoch': 3} {'type': 'loss', 'content': 0.10304071754217148, 'timestamp': '2025-09-10 02:59:34.901412', 'step': 18644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:34.957289', 'step': 18644, 'epoch': 3} {'type': 'loss', 'content': 0.11152530461549759, 'timestamp': '2025-09-10 02:59:34.959766', 'step': 18645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:35.013349', 'step': 18645, 'epoch': 3} {'type': 'loss', 'content': 0.12771877646446228, 'timestamp': '2025-09-10 02:59:35.015712', 'step': 18646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:35.071626', 'step': 18646, 'epoch': 3} {'type': 'loss', 'content': 0.09558124840259552, 'timestamp': '2025-09-10 02:59:35.073809', 'step': 18647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:35.128014', 'step': 18647, 'epoch': 3} {'type': 'loss', 'content': 0.10685061663389206, 'timestamp': '2025-09-10 02:59:35.134199', 'step': 18648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:35.188375', 'step': 18648, 'epoch': 3} {'type': 'loss', 'content': 0.034234799444675446, 'timestamp': '2025-09-10 02:59:35.190495', 'step': 18649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:35.243883', 'step': 18649, 'epoch': 3} {'type': 'loss', 'content': 0.10885025560855865, 'timestamp': '2025-09-10 02:59:35.246135', 'step': 18650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:35.299918', 'step': 18650, 'epoch': 3} {'type': 'loss', 'content': 0.10354379564523697, 'timestamp': '2025-09-10 02:59:35.302047', 'step': 18651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:35.355650', 'step': 18651, 'epoch': 3} {'type': 'loss', 'content': 0.18194468319416046, 'timestamp': '2025-09-10 02:59:35.361651', 'step': 18652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:35.414052', 'step': 18652, 'epoch': 3} {'type': 'loss', 'content': 0.13819918036460876, 'timestamp': '2025-09-10 02:59:35.416249', 'step': 18653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:35.469500', 'step': 18653, 'epoch': 3} {'type': 'loss', 'content': 0.0878099575638771, 'timestamp': '2025-09-10 02:59:35.471967', 'step': 18654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:35.528733', 'step': 18654, 'epoch': 3} {'type': 'loss', 'content': 0.06606636941432953, 'timestamp': '2025-09-10 02:59:35.531065', 'step': 18655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:35.585477', 'step': 18655, 'epoch': 3} {'type': 'loss', 'content': 0.11355728656053543, 'timestamp': '2025-09-10 02:59:35.591538', 'step': 18656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:35.645196', 'step': 18656, 'epoch': 3} {'type': 'loss', 'content': 0.09928932785987854, 'timestamp': '2025-09-10 02:59:35.647446', 'step': 18657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:35.701229', 'step': 18657, 'epoch': 3} {'type': 'loss', 'content': 0.016212232410907745, 'timestamp': '2025-09-10 02:59:35.703456', 'step': 18658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:35.757303', 'step': 18658, 'epoch': 3} {'type': 'loss', 'content': 0.09649813175201416, 'timestamp': '2025-09-10 02:59:35.759687', 'step': 18659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:35.812917', 'step': 18659, 'epoch': 3} {'type': 'loss', 'content': 0.09693407267332077, 'timestamp': '2025-09-10 02:59:35.818898', 'step': 18660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:35.872309', 'step': 18660, 'epoch': 3} {'type': 'loss', 'content': 0.09746234118938446, 'timestamp': '2025-09-10 02:59:35.874479', 'step': 18661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:35.927526', 'step': 18661, 'epoch': 3} {'type': 'loss', 'content': 0.049296364188194275, 'timestamp': '2025-09-10 02:59:35.929893', 'step': 18662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:35.983357', 'step': 18662, 'epoch': 3} {'type': 'loss', 'content': 0.11364737153053284, 'timestamp': '2025-09-10 02:59:35.985553', 'step': 18663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:36.038177', 'step': 18663, 'epoch': 3} {'type': 'loss', 'content': 0.1360938996076584, 'timestamp': '2025-09-10 02:59:36.044110', 'step': 18664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:36.096965', 'step': 18664, 'epoch': 3} {'type': 'loss', 'content': 0.0958297997713089, 'timestamp': '2025-09-10 02:59:36.099111', 'step': 18665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:36.153272', 'step': 18665, 'epoch': 3} {'type': 'loss', 'content': 0.08813159167766571, 'timestamp': '2025-09-10 02:59:36.155512', 'step': 18666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:36.209934', 'step': 18666, 'epoch': 3} {'type': 'loss', 'content': 0.07900525629520416, 'timestamp': '2025-09-10 02:59:36.212118', 'step': 18667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:36.265685', 'step': 18667, 'epoch': 3} {'type': 'loss', 'content': 0.08541031926870346, 'timestamp': '2025-09-10 02:59:36.271603', 'step': 18668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:36.324036', 'step': 18668, 'epoch': 3} {'type': 'loss', 'content': 0.11662155389785767, 'timestamp': '2025-09-10 02:59:36.326199', 'step': 18669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:36.379526', 'step': 18669, 'epoch': 3} {'type': 'loss', 'content': 0.09380576014518738, 'timestamp': '2025-09-10 02:59:36.381664', 'step': 18670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:36.434423', 'step': 18670, 'epoch': 3} {'type': 'loss', 'content': 0.10982764512300491, 'timestamp': '2025-09-10 02:59:36.436629', 'step': 18671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:36.489801', 'step': 18671, 'epoch': 3} {'type': 'loss', 'content': 0.06207618489861488, 'timestamp': '2025-09-10 02:59:36.496124', 'step': 18672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:36.550346', 'step': 18672, 'epoch': 3} {'type': 'loss', 'content': 0.08788836002349854, 'timestamp': '2025-09-10 02:59:36.552687', 'step': 18673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:36.607687', 'step': 18673, 'epoch': 3} {'type': 'loss', 'content': 0.07194606214761734, 'timestamp': '2025-09-10 02:59:36.609852', 'step': 18674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:36.664139', 'step': 18674, 'epoch': 3} {'type': 'loss', 'content': 0.06753399968147278, 'timestamp': '2025-09-10 02:59:36.666305', 'step': 18675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:36.719410', 'step': 18675, 'epoch': 3} {'type': 'loss', 'content': 0.0551924966275692, 'timestamp': '2025-09-10 02:59:36.725161', 'step': 18676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:36.777629', 'step': 18676, 'epoch': 3} {'type': 'loss', 'content': 0.06173120439052582, 'timestamp': '2025-09-10 02:59:36.780367', 'step': 18677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:59:36.835115', 'step': 18677, 'epoch': 3} {'type': 'loss', 'content': 0.13459008932113647, 'timestamp': '2025-09-10 02:59:36.837355', 'step': 18678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:36.891870', 'step': 18678, 'epoch': 3} {'type': 'loss', 'content': 0.044834062457084656, 'timestamp': '2025-09-10 02:59:36.894052', 'step': 18679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:36.947960', 'step': 18679, 'epoch': 3} {'type': 'loss', 'content': 0.03896557539701462, 'timestamp': '2025-09-10 02:59:36.958208', 'step': 18680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:37.015431', 'step': 18680, 'epoch': 3} {'type': 'loss', 'content': 0.0736255794763565, 'timestamp': '2025-09-10 02:59:37.020320', 'step': 18681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:37.077296', 'step': 18681, 'epoch': 3} {'type': 'loss', 'content': 0.06590507179498672, 'timestamp': '2025-09-10 02:59:37.079411', 'step': 18682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:37.133206', 'step': 18682, 'epoch': 3} {'type': 'loss', 'content': 0.07427073270082474, 'timestamp': '2025-09-10 02:59:37.135510', 'step': 18683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:37.189671', 'step': 18683, 'epoch': 3} {'type': 'loss', 'content': 0.0704561397433281, 'timestamp': '2025-09-10 02:59:37.195748', 'step': 18684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:37.248954', 'step': 18684, 'epoch': 3} {'type': 'loss', 'content': 0.032692499458789825, 'timestamp': '2025-09-10 02:59:37.251073', 'step': 18685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:37.306069', 'step': 18685, 'epoch': 3} {'type': 'loss', 'content': 0.05196882411837578, 'timestamp': '2025-09-10 02:59:37.308195', 'step': 18686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:37.361659', 'step': 18686, 'epoch': 3} {'type': 'loss', 'content': 0.05675746127963066, 'timestamp': '2025-09-10 02:59:37.363937', 'step': 18687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:37.419747', 'step': 18687, 'epoch': 3} {'type': 'loss', 'content': 0.10975532978773117, 'timestamp': '2025-09-10 02:59:37.426057', 'step': 18688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:37.481196', 'step': 18688, 'epoch': 3} {'type': 'loss', 'content': 0.10473135113716125, 'timestamp': '2025-09-10 02:59:37.484031', 'step': 18689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:37.537324', 'step': 18689, 'epoch': 3} {'type': 'loss', 'content': 0.06302734464406967, 'timestamp': '2025-09-10 02:59:37.539516', 'step': 18690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:37.593358', 'step': 18690, 'epoch': 3} {'type': 'loss', 'content': 0.060093529522418976, 'timestamp': '2025-09-10 02:59:37.596373', 'step': 18691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:37.651528', 'step': 18691, 'epoch': 3} {'type': 'loss', 'content': 0.11212533712387085, 'timestamp': '2025-09-10 02:59:37.657352', 'step': 18692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:37.712106', 'step': 18692, 'epoch': 3} {'type': 'loss', 'content': 0.05391850695014, 'timestamp': '2025-09-10 02:59:37.714362', 'step': 18693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:37.769917', 'step': 18693, 'epoch': 3} {'type': 'loss', 'content': 0.13378474116325378, 'timestamp': '2025-09-10 02:59:37.772029', 'step': 18694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:37.826203', 'step': 18694, 'epoch': 3} {'type': 'loss', 'content': 0.022501209750771523, 'timestamp': '2025-09-10 02:59:37.828300', 'step': 18695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:37.884864', 'step': 18695, 'epoch': 3} {'type': 'loss', 'content': 0.028066962957382202, 'timestamp': '2025-09-10 02:59:37.890863', 'step': 18696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:37.944361', 'step': 18696, 'epoch': 3} {'type': 'loss', 'content': 0.08571717888116837, 'timestamp': '2025-09-10 02:59:37.946511', 'step': 18697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:37.999970', 'step': 18697, 'epoch': 3} {'type': 'loss', 'content': 0.08895276486873627, 'timestamp': '2025-09-10 02:59:38.002326', 'step': 18698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:38.055690', 'step': 18698, 'epoch': 3} {'type': 'loss', 'content': 0.0677436888217926, 'timestamp': '2025-09-10 02:59:38.057853', 'step': 18699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:38.110746', 'step': 18699, 'epoch': 3} {'type': 'loss', 'content': 0.09674438089132309, 'timestamp': '2025-09-10 02:59:38.116547', 'step': 18700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:38.168982', 'step': 18700, 'epoch': 3} {'type': 'loss', 'content': 0.0834178775548935, 'timestamp': '2025-09-10 02:59:38.171158', 'step': 18701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:38.224255', 'step': 18701, 'epoch': 3} {'type': 'loss', 'content': 0.058923784643411636, 'timestamp': '2025-09-10 02:59:38.226654', 'step': 18702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:38.281616', 'step': 18702, 'epoch': 3} {'type': 'loss', 'content': 0.08048540353775024, 'timestamp': '2025-09-10 02:59:38.283953', 'step': 18703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:38.338172', 'step': 18703, 'epoch': 3} {'type': 'loss', 'content': 0.09481268376111984, 'timestamp': '2025-09-10 02:59:38.344352', 'step': 18704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:38.401209', 'step': 18704, 'epoch': 3} {'type': 'loss', 'content': 0.1433997005224228, 'timestamp': '2025-09-10 02:59:38.403371', 'step': 18705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:38.457500', 'step': 18705, 'epoch': 3} {'type': 'loss', 'content': 0.035742878913879395, 'timestamp': '2025-09-10 02:59:38.459678', 'step': 18706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:38.514193', 'step': 18706, 'epoch': 3} {'type': 'loss', 'content': 0.09445832669734955, 'timestamp': '2025-09-10 02:59:38.516495', 'step': 18707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:59:38.570667', 'step': 18707, 'epoch': 3} {'type': 'loss', 'content': 0.11393298208713531, 'timestamp': '2025-09-10 02:59:38.576577', 'step': 18708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:38.629091', 'step': 18708, 'epoch': 3} {'type': 'loss', 'content': 0.0973685011267662, 'timestamp': '2025-09-10 02:59:38.631184', 'step': 18709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:38.684970', 'step': 18709, 'epoch': 3} {'type': 'loss', 'content': 0.06825602054595947, 'timestamp': '2025-09-10 02:59:38.687042', 'step': 18710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:38.740345', 'step': 18710, 'epoch': 3} {'type': 'loss', 'content': 0.10449539870023727, 'timestamp': '2025-09-10 02:59:38.742481', 'step': 18711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:38.796161', 'step': 18711, 'epoch': 3} {'type': 'loss', 'content': 0.05104171112179756, 'timestamp': '2025-09-10 02:59:38.802156', 'step': 18712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:38.871951', 'step': 18712, 'epoch': 3} {'type': 'loss', 'content': 0.12066767364740372, 'timestamp': '2025-09-10 02:59:38.874114', 'step': 18713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:38.926537', 'step': 18713, 'epoch': 3} {'type': 'loss', 'content': 0.15595734119415283, 'timestamp': '2025-09-10 02:59:38.928658', 'step': 18714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:38.981620', 'step': 18714, 'epoch': 3} {'type': 'loss', 'content': 0.09532444924116135, 'timestamp': '2025-09-10 02:59:38.983748', 'step': 18715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:39.039171', 'step': 18715, 'epoch': 3} {'type': 'loss', 'content': 0.09463486075401306, 'timestamp': '2025-09-10 02:59:39.045286', 'step': 18716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:39.098447', 'step': 18716, 'epoch': 3} {'type': 'loss', 'content': 0.02139177732169628, 'timestamp': '2025-09-10 02:59:39.100678', 'step': 18717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:39.153616', 'step': 18717, 'epoch': 3} {'type': 'loss', 'content': 0.06432319432497025, 'timestamp': '2025-09-10 02:59:39.155736', 'step': 18718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:39.209291', 'step': 18718, 'epoch': 3} {'type': 'loss', 'content': 0.128770112991333, 'timestamp': '2025-09-10 02:59:39.211424', 'step': 18719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:39.264443', 'step': 18719, 'epoch': 3} {'type': 'loss', 'content': 0.0447489395737648, 'timestamp': '2025-09-10 02:59:39.270284', 'step': 18720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:39.323570', 'step': 18720, 'epoch': 3} {'type': 'loss', 'content': 0.05329098924994469, 'timestamp': '2025-09-10 02:59:39.325706', 'step': 18721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:39.379844', 'step': 18721, 'epoch': 3} {'type': 'loss', 'content': 0.05253222957253456, 'timestamp': '2025-09-10 02:59:39.382116', 'step': 18722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:39.436939', 'step': 18722, 'epoch': 3} {'type': 'loss', 'content': 0.06284855306148529, 'timestamp': '2025-09-10 02:59:39.439155', 'step': 18723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:39.492657', 'step': 18723, 'epoch': 3} {'type': 'loss', 'content': 0.13681502640247345, 'timestamp': '2025-09-10 02:59:39.498577', 'step': 18724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:39.551255', 'step': 18724, 'epoch': 3} {'type': 'loss', 'content': 0.01776563562452793, 'timestamp': '2025-09-10 02:59:39.553545', 'step': 18725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:39.607106', 'step': 18725, 'epoch': 3} {'type': 'loss', 'content': 0.022040141746401787, 'timestamp': '2025-09-10 02:59:39.609299', 'step': 18726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:39.662864', 'step': 18726, 'epoch': 3} {'type': 'loss', 'content': 0.09238738566637039, 'timestamp': '2025-09-10 02:59:39.664943', 'step': 18727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:39.718141', 'step': 18727, 'epoch': 3} {'type': 'loss', 'content': 0.011722155846655369, 'timestamp': '2025-09-10 02:59:39.724120', 'step': 18728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:39.777307', 'step': 18728, 'epoch': 3} {'type': 'loss', 'content': 0.14209812879562378, 'timestamp': '2025-09-10 02:59:39.779514', 'step': 18729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:39.833329', 'step': 18729, 'epoch': 3} {'type': 'loss', 'content': 0.07544837892055511, 'timestamp': '2025-09-10 02:59:39.835615', 'step': 18730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:39.894403', 'step': 18730, 'epoch': 3} {'type': 'loss', 'content': 0.0833430364727974, 'timestamp': '2025-09-10 02:59:39.896747', 'step': 18731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:39.954990', 'step': 18731, 'epoch': 3} {'type': 'loss', 'content': 0.1425899714231491, 'timestamp': '2025-09-10 02:59:39.961713', 'step': 18732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:40.017227', 'step': 18732, 'epoch': 3} {'type': 'loss', 'content': 0.09660839289426804, 'timestamp': '2025-09-10 02:59:40.019402', 'step': 18733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:40.075059', 'step': 18733, 'epoch': 3} {'type': 'loss', 'content': 0.03724323958158493, 'timestamp': '2025-09-10 02:59:40.077299', 'step': 18734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:40.131868', 'step': 18734, 'epoch': 3} {'type': 'loss', 'content': 0.07374610006809235, 'timestamp': '2025-09-10 02:59:40.134008', 'step': 18735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:40.189438', 'step': 18735, 'epoch': 3} {'type': 'loss', 'content': 0.11856091022491455, 'timestamp': '2025-09-10 02:59:40.195744', 'step': 18736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:40.249428', 'step': 18736, 'epoch': 3} {'type': 'loss', 'content': 0.08383605629205704, 'timestamp': '2025-09-10 02:59:40.251579', 'step': 18737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:40.304947', 'step': 18737, 'epoch': 3} {'type': 'loss', 'content': 0.041092488914728165, 'timestamp': '2025-09-10 02:59:40.307045', 'step': 18738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:40.360687', 'step': 18738, 'epoch': 3} {'type': 'loss', 'content': 0.1630786508321762, 'timestamp': '2025-09-10 02:59:40.362787', 'step': 18739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:40.417349', 'step': 18739, 'epoch': 3} {'type': 'loss', 'content': 0.10974100977182388, 'timestamp': '2025-09-10 02:59:40.423563', 'step': 18740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:40.477162', 'step': 18740, 'epoch': 3} {'type': 'loss', 'content': 0.06654264777898788, 'timestamp': '2025-09-10 02:59:40.479371', 'step': 18741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:40.533196', 'step': 18741, 'epoch': 3} {'type': 'loss', 'content': 0.1015084981918335, 'timestamp': '2025-09-10 02:59:40.535409', 'step': 18742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:40.589557', 'step': 18742, 'epoch': 3} {'type': 'loss', 'content': 0.022024547681212425, 'timestamp': '2025-09-10 02:59:40.591735', 'step': 18743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:40.646210', 'step': 18743, 'epoch': 3} {'type': 'loss', 'content': 0.12963806092739105, 'timestamp': '2025-09-10 02:59:40.652519', 'step': 18744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:40.708274', 'step': 18744, 'epoch': 3} {'type': 'loss', 'content': 0.05314529314637184, 'timestamp': '2025-09-10 02:59:40.710583', 'step': 18745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:40.765082', 'step': 18745, 'epoch': 3} {'type': 'loss', 'content': 0.1407794952392578, 'timestamp': '2025-09-10 02:59:40.767428', 'step': 18746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:40.821415', 'step': 18746, 'epoch': 3} {'type': 'loss', 'content': 0.04761364683508873, 'timestamp': '2025-09-10 02:59:40.823537', 'step': 18747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:40.876613', 'step': 18747, 'epoch': 3} {'type': 'loss', 'content': 0.1744152307510376, 'timestamp': '2025-09-10 02:59:40.882497', 'step': 18748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:40.935221', 'step': 18748, 'epoch': 3} {'type': 'loss', 'content': 0.09937341511249542, 'timestamp': '2025-09-10 02:59:40.937431', 'step': 18749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:40.991209', 'step': 18749, 'epoch': 3} {'type': 'loss', 'content': 0.05398565158247948, 'timestamp': '2025-09-10 02:59:40.993446', 'step': 18750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:41.047109', 'step': 18750, 'epoch': 3} {'type': 'loss', 'content': 0.05385071039199829, 'timestamp': '2025-09-10 02:59:41.049298', 'step': 18751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:41.102248', 'step': 18751, 'epoch': 3} {'type': 'loss', 'content': 0.12280628830194473, 'timestamp': '2025-09-10 02:59:41.108394', 'step': 18752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:41.161181', 'step': 18752, 'epoch': 3} {'type': 'loss', 'content': 0.10524753481149673, 'timestamp': '2025-09-10 02:59:41.163313', 'step': 18753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:41.217907', 'step': 18753, 'epoch': 3} {'type': 'loss', 'content': 0.1299443393945694, 'timestamp': '2025-09-10 02:59:41.220031', 'step': 18754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:41.273054', 'step': 18754, 'epoch': 3} {'type': 'loss', 'content': 0.10119213908910751, 'timestamp': '2025-09-10 02:59:41.275108', 'step': 18755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:41.327663', 'step': 18755, 'epoch': 3} {'type': 'loss', 'content': 0.0833030492067337, 'timestamp': '2025-09-10 02:59:41.333608', 'step': 18756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:41.386060', 'step': 18756, 'epoch': 3} {'type': 'loss', 'content': 0.11010279506444931, 'timestamp': '2025-09-10 02:59:41.388206', 'step': 18757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:41.442763', 'step': 18757, 'epoch': 3} {'type': 'loss', 'content': 0.04640175402164459, 'timestamp': '2025-09-10 02:59:41.444819', 'step': 18758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:41.498291', 'step': 18758, 'epoch': 3} {'type': 'loss', 'content': 0.07127518206834793, 'timestamp': '2025-09-10 02:59:41.500592', 'step': 18759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:41.553669', 'step': 18759, 'epoch': 3} {'type': 'loss', 'content': 0.10877876728773117, 'timestamp': '2025-09-10 02:59:41.559868', 'step': 18760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:41.612219', 'step': 18760, 'epoch': 3} {'type': 'loss', 'content': 0.10947553813457489, 'timestamp': '2025-09-10 02:59:41.614378', 'step': 18761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:41.667621', 'step': 18761, 'epoch': 3} {'type': 'loss', 'content': 0.0586228184401989, 'timestamp': '2025-09-10 02:59:41.669823', 'step': 18762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:41.723384', 'step': 18762, 'epoch': 3} {'type': 'loss', 'content': 0.08188223093748093, 'timestamp': '2025-09-10 02:59:41.725526', 'step': 18763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:41.778789', 'step': 18763, 'epoch': 3} {'type': 'loss', 'content': 0.1280588060617447, 'timestamp': '2025-09-10 02:59:41.784840', 'step': 18764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:41.837862', 'step': 18764, 'epoch': 3} {'type': 'loss', 'content': 0.16382402181625366, 'timestamp': '2025-09-10 02:59:41.840022', 'step': 18765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:41.893065', 'step': 18765, 'epoch': 3} {'type': 'loss', 'content': 0.09761890769004822, 'timestamp': '2025-09-10 02:59:41.895221', 'step': 18766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:41.951050', 'step': 18766, 'epoch': 3} {'type': 'loss', 'content': 0.1326693743467331, 'timestamp': '2025-09-10 02:59:41.954875', 'step': 18767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:42.011669', 'step': 18767, 'epoch': 3} {'type': 'loss', 'content': 0.16701234877109528, 'timestamp': '2025-09-10 02:59:42.017834', 'step': 18768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:42.070988', 'step': 18768, 'epoch': 3} {'type': 'loss', 'content': 0.11972836405038834, 'timestamp': '2025-09-10 02:59:42.073176', 'step': 18769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:42.126194', 'step': 18769, 'epoch': 3} {'type': 'loss', 'content': 0.10247272998094559, 'timestamp': '2025-09-10 02:59:42.130289', 'step': 18770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:42.187124', 'step': 18770, 'epoch': 3} {'type': 'loss', 'content': 0.10418706387281418, 'timestamp': '2025-09-10 02:59:42.189410', 'step': 18771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:42.243013', 'step': 18771, 'epoch': 3} {'type': 'loss', 'content': 0.04929129406809807, 'timestamp': '2025-09-10 02:59:42.248991', 'step': 18772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:42.301604', 'step': 18772, 'epoch': 3} {'type': 'loss', 'content': 0.13274835050106049, 'timestamp': '2025-09-10 02:59:42.303841', 'step': 18773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:42.356910', 'step': 18773, 'epoch': 3} {'type': 'loss', 'content': 0.11938419938087463, 'timestamp': '2025-09-10 02:59:42.360206', 'step': 18774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:42.420516', 'step': 18774, 'epoch': 3} {'type': 'loss', 'content': 0.12342782318592072, 'timestamp': '2025-09-10 02:59:42.422611', 'step': 18775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:42.475185', 'step': 18775, 'epoch': 3} {'type': 'loss', 'content': 0.14074280858039856, 'timestamp': '2025-09-10 02:59:42.481398', 'step': 18776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:42.534946', 'step': 18776, 'epoch': 3} {'type': 'loss', 'content': 0.1250961422920227, 'timestamp': '2025-09-10 02:59:42.537715', 'step': 18777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:42.596981', 'step': 18777, 'epoch': 3} {'type': 'loss', 'content': 0.06392768025398254, 'timestamp': '2025-09-10 02:59:42.599180', 'step': 18778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:42.652716', 'step': 18778, 'epoch': 3} {'type': 'loss', 'content': 0.06579302996397018, 'timestamp': '2025-09-10 02:59:42.654953', 'step': 18779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:42.708312', 'step': 18779, 'epoch': 3} {'type': 'loss', 'content': 0.06562729924917221, 'timestamp': '2025-09-10 02:59:42.714310', 'step': 18780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:42.767249', 'step': 18780, 'epoch': 3} {'type': 'loss', 'content': 0.1457759439945221, 'timestamp': '2025-09-10 02:59:42.769417', 'step': 18781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:42.822270', 'step': 18781, 'epoch': 3} {'type': 'loss', 'content': 0.09096942096948624, 'timestamp': '2025-09-10 02:59:42.824442', 'step': 18782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:42.877530', 'step': 18782, 'epoch': 3} {'type': 'loss', 'content': 0.08940321207046509, 'timestamp': '2025-09-10 02:59:42.879669', 'step': 18783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:42.932497', 'step': 18783, 'epoch': 3} {'type': 'loss', 'content': 0.026948265731334686, 'timestamp': '2025-09-10 02:59:42.938368', 'step': 18784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:42.990960', 'step': 18784, 'epoch': 3} {'type': 'loss', 'content': 0.13539282977581024, 'timestamp': '2025-09-10 02:59:42.993124', 'step': 18785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:43.045933', 'step': 18785, 'epoch': 3} {'type': 'loss', 'content': 0.15468081831932068, 'timestamp': '2025-09-10 02:59:43.048189', 'step': 18786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:43.102407', 'step': 18786, 'epoch': 3} {'type': 'loss', 'content': 0.0486566387116909, 'timestamp': '2025-09-10 02:59:43.104603', 'step': 18787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:43.158152', 'step': 18787, 'epoch': 3} {'type': 'loss', 'content': 0.05369379371404648, 'timestamp': '2025-09-10 02:59:43.164611', 'step': 18788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:43.217664', 'step': 18788, 'epoch': 3} {'type': 'loss', 'content': 0.049546632915735245, 'timestamp': '2025-09-10 02:59:43.220117', 'step': 18789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:43.274341', 'step': 18789, 'epoch': 3} {'type': 'loss', 'content': 0.10886035114526749, 'timestamp': '2025-09-10 02:59:43.276467', 'step': 18790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:43.329202', 'step': 18790, 'epoch': 3} {'type': 'loss', 'content': 0.026338515803217888, 'timestamp': '2025-09-10 02:59:43.331319', 'step': 18791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:43.383694', 'step': 18791, 'epoch': 3} {'type': 'loss', 'content': 0.1698136329650879, 'timestamp': '2025-09-10 02:59:43.389459', 'step': 18792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:43.441811', 'step': 18792, 'epoch': 3} {'type': 'loss', 'content': 0.12088797986507416, 'timestamp': '2025-09-10 02:59:43.443993', 'step': 18793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:43.498766', 'step': 18793, 'epoch': 3} {'type': 'loss', 'content': 0.09298604726791382, 'timestamp': '2025-09-10 02:59:43.500909', 'step': 18794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:43.559949', 'step': 18794, 'epoch': 3} {'type': 'loss', 'content': 0.05423782020807266, 'timestamp': '2025-09-10 02:59:43.562108', 'step': 18795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:43.617085', 'step': 18795, 'epoch': 3} {'type': 'loss', 'content': 0.02813710831105709, 'timestamp': '2025-09-10 02:59:43.623144', 'step': 18796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:43.676153', 'step': 18796, 'epoch': 3} {'type': 'loss', 'content': 0.02726835384964943, 'timestamp': '2025-09-10 02:59:43.678517', 'step': 18797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:43.731457', 'step': 18797, 'epoch': 3} {'type': 'loss', 'content': 0.04964805766940117, 'timestamp': '2025-09-10 02:59:43.733624', 'step': 18798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:43.786781', 'step': 18798, 'epoch': 3} {'type': 'loss', 'content': 0.10734909027814865, 'timestamp': '2025-09-10 02:59:43.788978', 'step': 18799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:43.842375', 'step': 18799, 'epoch': 3} {'type': 'loss', 'content': 0.05314193293452263, 'timestamp': '2025-09-10 02:59:43.848212', 'step': 18800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:43.902205', 'step': 18800, 'epoch': 3} {'type': 'loss', 'content': 0.11626213788986206, 'timestamp': '2025-09-10 02:59:43.904317', 'step': 18801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:43.957790', 'step': 18801, 'epoch': 3} {'type': 'loss', 'content': 0.09911895543336868, 'timestamp': '2025-09-10 02:59:43.960196', 'step': 18802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:44.014227', 'step': 18802, 'epoch': 3} {'type': 'loss', 'content': 0.05770057439804077, 'timestamp': '2025-09-10 02:59:44.016521', 'step': 18803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:44.070617', 'step': 18803, 'epoch': 3} {'type': 'loss', 'content': 0.12527354061603546, 'timestamp': '2025-09-10 02:59:44.076435', 'step': 18804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:44.129271', 'step': 18804, 'epoch': 3} {'type': 'loss', 'content': 0.11496686190366745, 'timestamp': '2025-09-10 02:59:44.131522', 'step': 18805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:44.187021', 'step': 18805, 'epoch': 3} {'type': 'loss', 'content': 0.049991074949502945, 'timestamp': '2025-09-10 02:59:44.189349', 'step': 18806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:44.243242', 'step': 18806, 'epoch': 3} {'type': 'loss', 'content': 0.052713893353939056, 'timestamp': '2025-09-10 02:59:44.245469', 'step': 18807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:44.298848', 'step': 18807, 'epoch': 3} {'type': 'loss', 'content': 0.10013548284769058, 'timestamp': '2025-09-10 02:59:44.304882', 'step': 18808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:44.357802', 'step': 18808, 'epoch': 3} {'type': 'loss', 'content': 0.14559867978096008, 'timestamp': '2025-09-10 02:59:44.360373', 'step': 18809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:44.415832', 'step': 18809, 'epoch': 3} {'type': 'loss', 'content': 0.10379140824079514, 'timestamp': '2025-09-10 02:59:44.418066', 'step': 18810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:44.471844', 'step': 18810, 'epoch': 3} {'type': 'loss', 'content': 0.09466171264648438, 'timestamp': '2025-09-10 02:59:44.473913', 'step': 18811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:44.529424', 'step': 18811, 'epoch': 3} {'type': 'loss', 'content': 0.09686899185180664, 'timestamp': '2025-09-10 02:59:44.535530', 'step': 18812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:44.590293', 'step': 18812, 'epoch': 3} {'type': 'loss', 'content': 0.044699396938085556, 'timestamp': '2025-09-10 02:59:44.592659', 'step': 18813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:44.646360', 'step': 18813, 'epoch': 3} {'type': 'loss', 'content': 0.08297006040811539, 'timestamp': '2025-09-10 02:59:44.648668', 'step': 18814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:44.703845', 'step': 18814, 'epoch': 3} {'type': 'loss', 'content': 0.12587124109268188, 'timestamp': '2025-09-10 02:59:44.706161', 'step': 18815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:44.759912', 'step': 18815, 'epoch': 3} {'type': 'loss', 'content': 0.058880746364593506, 'timestamp': '2025-09-10 02:59:44.765974', 'step': 18816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:44.819094', 'step': 18816, 'epoch': 3} {'type': 'loss', 'content': 0.0719883069396019, 'timestamp': '2025-09-10 02:59:44.821513', 'step': 18817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:44.875528', 'step': 18817, 'epoch': 3} {'type': 'loss', 'content': 0.08005451411008835, 'timestamp': '2025-09-10 02:59:44.877906', 'step': 18818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:44.931272', 'step': 18818, 'epoch': 3} {'type': 'loss', 'content': 0.08168225735425949, 'timestamp': '2025-09-10 02:59:44.933499', 'step': 18819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:44.987123', 'step': 18819, 'epoch': 3} {'type': 'loss', 'content': 0.13314837217330933, 'timestamp': '2025-09-10 02:59:44.993031', 'step': 18820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:45.046007', 'step': 18820, 'epoch': 3} {'type': 'loss', 'content': 0.12552782893180847, 'timestamp': '2025-09-10 02:59:45.048149', 'step': 18821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:45.101666', 'step': 18821, 'epoch': 3} {'type': 'loss', 'content': 0.03683556616306305, 'timestamp': '2025-09-10 02:59:45.103796', 'step': 18822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:45.156762', 'step': 18822, 'epoch': 3} {'type': 'loss', 'content': 0.09035690873861313, 'timestamp': '2025-09-10 02:59:45.158998', 'step': 18823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:45.212221', 'step': 18823, 'epoch': 3} {'type': 'loss', 'content': 0.06019501015543938, 'timestamp': '2025-09-10 02:59:45.218186', 'step': 18824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:45.271934', 'step': 18824, 'epoch': 3} {'type': 'loss', 'content': 0.057687148451805115, 'timestamp': '2025-09-10 02:59:45.274059', 'step': 18825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:45.327209', 'step': 18825, 'epoch': 3} {'type': 'loss', 'content': 0.0955529510974884, 'timestamp': '2025-09-10 02:59:45.329427', 'step': 18826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:45.384017', 'step': 18826, 'epoch': 3} {'type': 'loss', 'content': 0.09230813384056091, 'timestamp': '2025-09-10 02:59:45.386195', 'step': 18827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:45.441197', 'step': 18827, 'epoch': 3} {'type': 'loss', 'content': 0.035465337336063385, 'timestamp': '2025-09-10 02:59:45.447022', 'step': 18828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:45.499802', 'step': 18828, 'epoch': 3} {'type': 'loss', 'content': 0.11705663800239563, 'timestamp': '2025-09-10 02:59:45.501928', 'step': 18829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:45.555242', 'step': 18829, 'epoch': 3} {'type': 'loss', 'content': 0.1117798462510109, 'timestamp': '2025-09-10 02:59:45.557372', 'step': 18830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:45.610512', 'step': 18830, 'epoch': 3} {'type': 'loss', 'content': 0.07653296738862991, 'timestamp': '2025-09-10 02:59:45.612744', 'step': 18831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:45.666361', 'step': 18831, 'epoch': 3} {'type': 'loss', 'content': 0.05842661112546921, 'timestamp': '2025-09-10 02:59:45.672219', 'step': 18832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:45.726720', 'step': 18832, 'epoch': 3} {'type': 'loss', 'content': 0.07984106987714767, 'timestamp': '2025-09-10 02:59:45.728947', 'step': 18833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:45.783334', 'step': 18833, 'epoch': 3} {'type': 'loss', 'content': 0.06446290761232376, 'timestamp': '2025-09-10 02:59:45.785497', 'step': 18834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:45.839313', 'step': 18834, 'epoch': 3} {'type': 'loss', 'content': 0.10581725835800171, 'timestamp': '2025-09-10 02:59:45.841540', 'step': 18835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:45.895032', 'step': 18835, 'epoch': 3} {'type': 'loss', 'content': 0.04696881026029587, 'timestamp': '2025-09-10 02:59:45.900925', 'step': 18836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:45.953961', 'step': 18836, 'epoch': 3} {'type': 'loss', 'content': 0.060621052980422974, 'timestamp': '2025-09-10 02:59:45.956084', 'step': 18837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:46.009228', 'step': 18837, 'epoch': 3} {'type': 'loss', 'content': 0.12121490389108658, 'timestamp': '2025-09-10 02:59:46.011476', 'step': 18838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:46.064568', 'step': 18838, 'epoch': 3} {'type': 'loss', 'content': 0.18088586628437042, 'timestamp': '2025-09-10 02:59:46.066757', 'step': 18839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:46.119676', 'step': 18839, 'epoch': 3} {'type': 'loss', 'content': 0.10370960086584091, 'timestamp': '2025-09-10 02:59:46.125524', 'step': 18840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:46.178160', 'step': 18840, 'epoch': 3} {'type': 'loss', 'content': 0.07216653227806091, 'timestamp': '2025-09-10 02:59:46.180720', 'step': 18841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:46.233674', 'step': 18841, 'epoch': 3} {'type': 'loss', 'content': 0.10156495869159698, 'timestamp': '2025-09-10 02:59:46.235837', 'step': 18842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:46.289683', 'step': 18842, 'epoch': 3} {'type': 'loss', 'content': 0.11330896615982056, 'timestamp': '2025-09-10 02:59:46.291859', 'step': 18843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:46.346776', 'step': 18843, 'epoch': 3} {'type': 'loss', 'content': 0.1116400957107544, 'timestamp': '2025-09-10 02:59:46.352827', 'step': 18844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:46.405954', 'step': 18844, 'epoch': 3} {'type': 'loss', 'content': 0.06648580729961395, 'timestamp': '2025-09-10 02:59:46.408245', 'step': 18845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:46.461286', 'step': 18845, 'epoch': 3} {'type': 'loss', 'content': 0.11529678106307983, 'timestamp': '2025-09-10 02:59:46.463546', 'step': 18846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:46.517258', 'step': 18846, 'epoch': 3} {'type': 'loss', 'content': 0.05383431911468506, 'timestamp': '2025-09-10 02:59:46.519449', 'step': 18847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:46.572323', 'step': 18847, 'epoch': 3} {'type': 'loss', 'content': 0.05501003563404083, 'timestamp': '2025-09-10 02:59:46.578235', 'step': 18848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:46.630901', 'step': 18848, 'epoch': 3} {'type': 'loss', 'content': 0.06775315850973129, 'timestamp': '2025-09-10 02:59:46.633099', 'step': 18849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:46.686406', 'step': 18849, 'epoch': 3} {'type': 'loss', 'content': 0.06904086470603943, 'timestamp': '2025-09-10 02:59:46.688530', 'step': 18850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:46.741881', 'step': 18850, 'epoch': 3} {'type': 'loss', 'content': 0.1381010115146637, 'timestamp': '2025-09-10 02:59:46.744127', 'step': 18851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:46.797697', 'step': 18851, 'epoch': 3} {'type': 'loss', 'content': 0.08895380795001984, 'timestamp': '2025-09-10 02:59:46.803690', 'step': 18852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:46.858643', 'step': 18852, 'epoch': 3} {'type': 'loss', 'content': 0.10194516181945801, 'timestamp': '2025-09-10 02:59:46.860797', 'step': 18853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:46.915020', 'step': 18853, 'epoch': 3} {'type': 'loss', 'content': 0.08822695910930634, 'timestamp': '2025-09-10 02:59:46.917179', 'step': 18854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:46.970829', 'step': 18854, 'epoch': 3} {'type': 'loss', 'content': 0.11031299829483032, 'timestamp': '2025-09-10 02:59:46.973071', 'step': 18855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:47.026293', 'step': 18855, 'epoch': 3} {'type': 'loss', 'content': 0.027891771867871284, 'timestamp': '2025-09-10 02:59:47.032352', 'step': 18856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:47.089050', 'step': 18856, 'epoch': 3} {'type': 'loss', 'content': 0.13951145112514496, 'timestamp': '2025-09-10 02:59:47.091238', 'step': 18857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:47.150328', 'step': 18857, 'epoch': 3} {'type': 'loss', 'content': 0.09967596083879471, 'timestamp': '2025-09-10 02:59:47.152498', 'step': 18858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:47.205833', 'step': 18858, 'epoch': 3} {'type': 'loss', 'content': 0.11401613801717758, 'timestamp': '2025-09-10 02:59:47.208082', 'step': 18859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:47.261194', 'step': 18859, 'epoch': 3} {'type': 'loss', 'content': 0.06810548901557922, 'timestamp': '2025-09-10 02:59:47.267411', 'step': 18860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:47.319971', 'step': 18860, 'epoch': 3} {'type': 'loss', 'content': 0.11307121068239212, 'timestamp': '2025-09-10 02:59:47.322232', 'step': 18861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:47.376005', 'step': 18861, 'epoch': 3} {'type': 'loss', 'content': 0.13184598088264465, 'timestamp': '2025-09-10 02:59:47.378187', 'step': 18862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:47.436208', 'step': 18862, 'epoch': 3} {'type': 'loss', 'content': 0.14055860042572021, 'timestamp': '2025-09-10 02:59:47.438336', 'step': 18863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:47.491819', 'step': 18863, 'epoch': 3} {'type': 'loss', 'content': 0.08961936831474304, 'timestamp': '2025-09-10 02:59:47.497859', 'step': 18864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:47.550649', 'step': 18864, 'epoch': 3} {'type': 'loss', 'content': 0.13253642618656158, 'timestamp': '2025-09-10 02:59:47.552778', 'step': 18865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:47.605976', 'step': 18865, 'epoch': 3} {'type': 'loss', 'content': 0.11524276435375214, 'timestamp': '2025-09-10 02:59:47.608055', 'step': 18866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:47.661808', 'step': 18866, 'epoch': 3} {'type': 'loss', 'content': 0.08968225866556168, 'timestamp': '2025-09-10 02:59:47.663896', 'step': 18867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:47.717048', 'step': 18867, 'epoch': 3} {'type': 'loss', 'content': 0.13889208436012268, 'timestamp': '2025-09-10 02:59:47.722894', 'step': 18868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:47.775667', 'step': 18868, 'epoch': 3} {'type': 'loss', 'content': 0.07484116405248642, 'timestamp': '2025-09-10 02:59:47.777853', 'step': 18869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:47.830748', 'step': 18869, 'epoch': 3} {'type': 'loss', 'content': 0.0685531422495842, 'timestamp': '2025-09-10 02:59:47.832899', 'step': 18870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:47.886519', 'step': 18870, 'epoch': 3} {'type': 'loss', 'content': 0.19147884845733643, 'timestamp': '2025-09-10 02:59:47.888642', 'step': 18871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:47.942931', 'step': 18871, 'epoch': 3} {'type': 'loss', 'content': 0.07163829356431961, 'timestamp': '2025-09-10 02:59:47.949325', 'step': 18872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:48.002865', 'step': 18872, 'epoch': 3} {'type': 'loss', 'content': 0.10464517027139664, 'timestamp': '2025-09-10 02:59:48.004960', 'step': 18873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:48.058799', 'step': 18873, 'epoch': 3} {'type': 'loss', 'content': 0.024578578770160675, 'timestamp': '2025-09-10 02:59:48.061122', 'step': 18874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:48.114031', 'step': 18874, 'epoch': 3} {'type': 'loss', 'content': 0.08198142051696777, 'timestamp': '2025-09-10 02:59:48.116330', 'step': 18875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:48.170257', 'step': 18875, 'epoch': 3} {'type': 'loss', 'content': 0.04160090163350105, 'timestamp': '2025-09-10 02:59:48.176180', 'step': 18876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:48.228628', 'step': 18876, 'epoch': 3} {'type': 'loss', 'content': 0.06508095562458038, 'timestamp': '2025-09-10 02:59:48.230732', 'step': 18877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:48.283893', 'step': 18877, 'epoch': 3} {'type': 'loss', 'content': 0.10994856804609299, 'timestamp': '2025-09-10 02:59:48.286177', 'step': 18878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:48.339157', 'step': 18878, 'epoch': 3} {'type': 'loss', 'content': 0.08679889887571335, 'timestamp': '2025-09-10 02:59:48.341303', 'step': 18879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:48.394968', 'step': 18879, 'epoch': 3} {'type': 'loss', 'content': 0.05094311013817787, 'timestamp': '2025-09-10 02:59:48.400747', 'step': 18880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:48.454096', 'step': 18880, 'epoch': 3} {'type': 'loss', 'content': 0.11708471179008484, 'timestamp': '2025-09-10 02:59:48.456305', 'step': 18881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:48.511151', 'step': 18881, 'epoch': 3} {'type': 'loss', 'content': 0.07422416657209396, 'timestamp': '2025-09-10 02:59:48.513434', 'step': 18882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:48.568302', 'step': 18882, 'epoch': 3} {'type': 'loss', 'content': 0.08754188567399979, 'timestamp': '2025-09-10 02:59:48.570466', 'step': 18883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:48.623532', 'step': 18883, 'epoch': 3} {'type': 'loss', 'content': 0.0809752494096756, 'timestamp': '2025-09-10 02:59:48.629431', 'step': 18884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:48.682431', 'step': 18884, 'epoch': 3} {'type': 'loss', 'content': 0.0516752228140831, 'timestamp': '2025-09-10 02:59:48.684541', 'step': 18885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:48.737221', 'step': 18885, 'epoch': 3} {'type': 'loss', 'content': 0.17493434250354767, 'timestamp': '2025-09-10 02:59:48.739322', 'step': 18886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:48.793114', 'step': 18886, 'epoch': 3} {'type': 'loss', 'content': 0.05835770443081856, 'timestamp': '2025-09-10 02:59:48.795330', 'step': 18887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:48.848359', 'step': 18887, 'epoch': 3} {'type': 'loss', 'content': 0.05833218991756439, 'timestamp': '2025-09-10 02:59:48.854295', 'step': 18888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:48.906939', 'step': 18888, 'epoch': 3} {'type': 'loss', 'content': 0.16192176938056946, 'timestamp': '2025-09-10 02:59:48.909179', 'step': 18889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:48.964509', 'step': 18889, 'epoch': 3} {'type': 'loss', 'content': 0.1465177834033966, 'timestamp': '2025-09-10 02:59:48.966685', 'step': 18890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:49.020901', 'step': 18890, 'epoch': 3} {'type': 'loss', 'content': 0.05570022389292717, 'timestamp': '2025-09-10 02:59:49.022991', 'step': 18891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:49.076431', 'step': 18891, 'epoch': 3} {'type': 'loss', 'content': 0.07605341821908951, 'timestamp': '2025-09-10 02:59:49.082357', 'step': 18892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:49.135233', 'step': 18892, 'epoch': 3} {'type': 'loss', 'content': 0.09832786023616791, 'timestamp': '2025-09-10 02:59:49.137286', 'step': 18893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:49.190599', 'step': 18893, 'epoch': 3} {'type': 'loss', 'content': 0.06656192243099213, 'timestamp': '2025-09-10 02:59:49.192764', 'step': 18894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:49.245445', 'step': 18894, 'epoch': 3} {'type': 'loss', 'content': 0.07123999297618866, 'timestamp': '2025-09-10 02:59:49.247601', 'step': 18895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:49.300630', 'step': 18895, 'epoch': 3} {'type': 'loss', 'content': 0.06204240396618843, 'timestamp': '2025-09-10 02:59:49.306546', 'step': 18896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:49.360685', 'step': 18896, 'epoch': 3} {'type': 'loss', 'content': 0.1175832599401474, 'timestamp': '2025-09-10 02:59:49.362915', 'step': 18897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:49.415788', 'step': 18897, 'epoch': 3} {'type': 'loss', 'content': 0.07047747075557709, 'timestamp': '2025-09-10 02:59:49.417879', 'step': 18898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:49.472091', 'step': 18898, 'epoch': 3} {'type': 'loss', 'content': 0.14756816625595093, 'timestamp': '2025-09-10 02:59:49.474299', 'step': 18899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:49.528307', 'step': 18899, 'epoch': 3} {'type': 'loss', 'content': 0.07582927495241165, 'timestamp': '2025-09-10 02:59:49.534358', 'step': 18900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:49.587187', 'step': 18900, 'epoch': 3} {'type': 'loss', 'content': 0.05292009189724922, 'timestamp': '2025-09-10 02:59:49.589302', 'step': 18901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:59:49.642053', 'step': 18901, 'epoch': 3} {'type': 'loss', 'content': 0.08315184712409973, 'timestamp': '2025-09-10 02:59:49.644184', 'step': 18902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:49.697728', 'step': 18902, 'epoch': 3} {'type': 'loss', 'content': 0.11339873820543289, 'timestamp': '2025-09-10 02:59:49.700074', 'step': 18903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:49.753259', 'step': 18903, 'epoch': 3} {'type': 'loss', 'content': 0.05590938776731491, 'timestamp': '2025-09-10 02:59:49.759349', 'step': 18904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:49.811841', 'step': 18904, 'epoch': 3} {'type': 'loss', 'content': 0.07148449122905731, 'timestamp': '2025-09-10 02:59:49.814126', 'step': 18905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:49.867009', 'step': 18905, 'epoch': 3} {'type': 'loss', 'content': 0.06458579748868942, 'timestamp': '2025-09-10 02:59:49.869045', 'step': 18906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:49.922189', 'step': 18906, 'epoch': 3} {'type': 'loss', 'content': 0.07223308831453323, 'timestamp': '2025-09-10 02:59:49.924296', 'step': 18907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:49.980434', 'step': 18907, 'epoch': 3} {'type': 'loss', 'content': 0.04491811245679855, 'timestamp': '2025-09-10 02:59:49.986054', 'step': 18908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:50.039186', 'step': 18908, 'epoch': 3} {'type': 'loss', 'content': 0.0747344121336937, 'timestamp': '2025-09-10 02:59:50.041058', 'step': 18909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:59:50.095490', 'step': 18909, 'epoch': 3} {'type': 'loss', 'content': 0.0725783184170723, 'timestamp': '2025-09-10 02:59:50.097466', 'step': 18910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:50.152408', 'step': 18910, 'epoch': 3} {'type': 'loss', 'content': 0.09360440820455551, 'timestamp': '2025-09-10 02:59:50.154490', 'step': 18911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:50.207292', 'step': 18911, 'epoch': 3} {'type': 'loss', 'content': 0.046462833881378174, 'timestamp': '2025-09-10 02:59:50.213271', 'step': 18912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:50.265727', 'step': 18912, 'epoch': 3} {'type': 'loss', 'content': 0.032952144742012024, 'timestamp': '2025-09-10 02:59:50.267650', 'step': 18913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:50.320026', 'step': 18913, 'epoch': 3} {'type': 'loss', 'content': 0.04104139283299446, 'timestamp': '2025-09-10 02:59:50.322321', 'step': 18914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:50.375747', 'step': 18914, 'epoch': 3} {'type': 'loss', 'content': 0.03410991653800011, 'timestamp': '2025-09-10 02:59:50.377870', 'step': 18915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:50.432413', 'step': 18915, 'epoch': 3} {'type': 'loss', 'content': 0.1109863743185997, 'timestamp': '2025-09-10 02:59:50.438242', 'step': 18916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:50.490527', 'step': 18916, 'epoch': 3} {'type': 'loss', 'content': 0.06481048464775085, 'timestamp': '2025-09-10 02:59:50.492804', 'step': 18917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:50.545266', 'step': 18917, 'epoch': 3} {'type': 'loss', 'content': 0.12288767844438553, 'timestamp': '2025-09-10 02:59:50.547593', 'step': 18918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:50.601879', 'step': 18918, 'epoch': 3} {'type': 'loss', 'content': 0.0983644649386406, 'timestamp': '2025-09-10 02:59:50.604026', 'step': 18919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:50.656827', 'step': 18919, 'epoch': 3} {'type': 'loss', 'content': 0.15942248702049255, 'timestamp': '2025-09-10 02:59:50.663068', 'step': 18920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:50.716061', 'step': 18920, 'epoch': 3} {'type': 'loss', 'content': 0.08795859664678574, 'timestamp': '2025-09-10 02:59:50.718476', 'step': 18921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:50.771695', 'step': 18921, 'epoch': 3} {'type': 'loss', 'content': 0.04695338383316994, 'timestamp': '2025-09-10 02:59:50.773797', 'step': 18922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:50.827364', 'step': 18922, 'epoch': 3} {'type': 'loss', 'content': 0.048250820487737656, 'timestamp': '2025-09-10 02:59:50.829591', 'step': 18923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:50.883121', 'step': 18923, 'epoch': 3} {'type': 'loss', 'content': 0.08112329244613647, 'timestamp': '2025-09-10 02:59:50.888974', 'step': 18924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:50.941320', 'step': 18924, 'epoch': 3} {'type': 'loss', 'content': 0.14666523039340973, 'timestamp': '2025-09-10 02:59:50.943428', 'step': 18925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:50.997686', 'step': 18925, 'epoch': 3} {'type': 'loss', 'content': 0.13669830560684204, 'timestamp': '2025-09-10 02:59:50.999827', 'step': 18926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:51.053093', 'step': 18926, 'epoch': 3} {'type': 'loss', 'content': 0.11825177818536758, 'timestamp': '2025-09-10 02:59:51.055134', 'step': 18927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:51.108743', 'step': 18927, 'epoch': 3} {'type': 'loss', 'content': 0.07623801380395889, 'timestamp': '2025-09-10 02:59:51.114792', 'step': 18928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:51.169049', 'step': 18928, 'epoch': 3} {'type': 'loss', 'content': 0.1747276484966278, 'timestamp': '2025-09-10 02:59:51.171028', 'step': 18929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:51.223731', 'step': 18929, 'epoch': 3} {'type': 'loss', 'content': 0.0812414288520813, 'timestamp': '2025-09-10 02:59:51.225918', 'step': 18930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:51.279241', 'step': 18930, 'epoch': 3} {'type': 'loss', 'content': 0.05628769472241402, 'timestamp': '2025-09-10 02:59:51.281537', 'step': 18931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:51.334385', 'step': 18931, 'epoch': 3} {'type': 'loss', 'content': 0.06565386801958084, 'timestamp': '2025-09-10 02:59:51.340503', 'step': 18932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:51.394298', 'step': 18932, 'epoch': 3} {'type': 'loss', 'content': 0.18757957220077515, 'timestamp': '2025-09-10 02:59:51.396395', 'step': 18933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:51.448819', 'step': 18933, 'epoch': 3} {'type': 'loss', 'content': 0.06725817918777466, 'timestamp': '2025-09-10 02:59:51.451034', 'step': 18934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:51.504133', 'step': 18934, 'epoch': 3} {'type': 'loss', 'content': 0.09987621754407883, 'timestamp': '2025-09-10 02:59:51.506275', 'step': 18935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:51.558931', 'step': 18935, 'epoch': 3} {'type': 'loss', 'content': 0.09358947724103928, 'timestamp': '2025-09-10 02:59:51.564792', 'step': 18936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:51.617602', 'step': 18936, 'epoch': 3} {'type': 'loss', 'content': 0.04950876161456108, 'timestamp': '2025-09-10 02:59:51.619814', 'step': 18937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:51.673897', 'step': 18937, 'epoch': 3} {'type': 'loss', 'content': 0.13628782331943512, 'timestamp': '2025-09-10 02:59:51.676594', 'step': 18938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:51.730394', 'step': 18938, 'epoch': 3} {'type': 'loss', 'content': 0.11241237074136734, 'timestamp': '2025-09-10 02:59:51.732663', 'step': 18939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:51.786030', 'step': 18939, 'epoch': 3} {'type': 'loss', 'content': 0.02213568054139614, 'timestamp': '2025-09-10 02:59:51.791973', 'step': 18940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:51.844152', 'step': 18940, 'epoch': 3} {'type': 'loss', 'content': 0.09005841612815857, 'timestamp': '2025-09-10 02:59:51.846407', 'step': 18941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:51.900409', 'step': 18941, 'epoch': 3} {'type': 'loss', 'content': 0.24332357943058014, 'timestamp': '2025-09-10 02:59:51.902531', 'step': 18942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:51.955669', 'step': 18942, 'epoch': 3} {'type': 'loss', 'content': 0.09001410752534866, 'timestamp': '2025-09-10 02:59:51.957783', 'step': 18943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:52.011212', 'step': 18943, 'epoch': 3} {'type': 'loss', 'content': 0.11486639827489853, 'timestamp': '2025-09-10 02:59:52.016976', 'step': 18944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:52.069414', 'step': 18944, 'epoch': 3} {'type': 'loss', 'content': 0.06267368048429489, 'timestamp': '2025-09-10 02:59:52.071550', 'step': 18945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:52.126120', 'step': 18945, 'epoch': 3} {'type': 'loss', 'content': 0.07280915975570679, 'timestamp': '2025-09-10 02:59:52.128418', 'step': 18946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:52.183373', 'step': 18946, 'epoch': 3} {'type': 'loss', 'content': 0.03629525005817413, 'timestamp': '2025-09-10 02:59:52.185525', 'step': 18947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:52.238929', 'step': 18947, 'epoch': 3} {'type': 'loss', 'content': 0.08154822140932083, 'timestamp': '2025-09-10 02:59:52.244861', 'step': 18948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:52.297740', 'step': 18948, 'epoch': 3} {'type': 'loss', 'content': 0.11161770671606064, 'timestamp': '2025-09-10 02:59:52.299771', 'step': 18949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:52.352366', 'step': 18949, 'epoch': 3} {'type': 'loss', 'content': 0.14076262712478638, 'timestamp': '2025-09-10 02:59:52.354368', 'step': 18950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:52.408531', 'step': 18950, 'epoch': 3} {'type': 'loss', 'content': 0.05204097926616669, 'timestamp': '2025-09-10 02:59:52.410639', 'step': 18951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:52.463130', 'step': 18951, 'epoch': 3} {'type': 'loss', 'content': 0.11093245446681976, 'timestamp': '2025-09-10 02:59:52.468997', 'step': 18952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:52.521858', 'step': 18952, 'epoch': 3} {'type': 'loss', 'content': 0.11349879205226898, 'timestamp': '2025-09-10 02:59:52.524005', 'step': 18953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:52.576619', 'step': 18953, 'epoch': 3} {'type': 'loss', 'content': 0.12166004627943039, 'timestamp': '2025-09-10 02:59:52.578728', 'step': 18954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:52.631601', 'step': 18954, 'epoch': 3} {'type': 'loss', 'content': 0.08511122316122055, 'timestamp': '2025-09-10 02:59:52.633792', 'step': 18955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:52.686323', 'step': 18955, 'epoch': 3} {'type': 'loss', 'content': 0.10902899503707886, 'timestamp': '2025-09-10 02:59:52.692228', 'step': 18956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:52.745925', 'step': 18956, 'epoch': 3} {'type': 'loss', 'content': 0.08965844660997391, 'timestamp': '2025-09-10 02:59:52.748043', 'step': 18957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:52.801856', 'step': 18957, 'epoch': 3} {'type': 'loss', 'content': 0.0634138435125351, 'timestamp': '2025-09-10 02:59:52.803986', 'step': 18958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:52.856776', 'step': 18958, 'epoch': 3} {'type': 'loss', 'content': 0.10096431523561478, 'timestamp': '2025-09-10 02:59:52.858994', 'step': 18959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:52.912674', 'step': 18959, 'epoch': 3} {'type': 'loss', 'content': 0.04305622726678848, 'timestamp': '2025-09-10 02:59:52.918587', 'step': 18960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:52.971031', 'step': 18960, 'epoch': 3} {'type': 'loss', 'content': 0.10341072827577591, 'timestamp': '2025-09-10 02:59:52.973271', 'step': 18961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.027401', 'step': 18961, 'epoch': 3} {'type': 'loss', 'content': 0.07722827047109604, 'timestamp': '2025-09-10 02:59:53.029481', 'step': 18962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.082017', 'step': 18962, 'epoch': 3} {'type': 'loss', 'content': 0.10384315252304077, 'timestamp': '2025-09-10 02:59:53.084139', 'step': 18963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:53.136664', 'step': 18963, 'epoch': 3} {'type': 'loss', 'content': 0.07161323726177216, 'timestamp': '2025-09-10 02:59:53.142371', 'step': 18964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:53.194716', 'step': 18964, 'epoch': 3} {'type': 'loss', 'content': 0.07990710437297821, 'timestamp': '2025-09-10 02:59:53.196853', 'step': 18965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.250146', 'step': 18965, 'epoch': 3} {'type': 'loss', 'content': 0.05096527189016342, 'timestamp': '2025-09-10 02:59:53.252199', 'step': 18966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:53.306217', 'step': 18966, 'epoch': 3} {'type': 'loss', 'content': 0.06319965422153473, 'timestamp': '2025-09-10 02:59:53.308344', 'step': 18967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:53.361282', 'step': 18967, 'epoch': 3} {'type': 'loss', 'content': 0.103169746696949, 'timestamp': '2025-09-10 02:59:53.367259', 'step': 18968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.421010', 'step': 18968, 'epoch': 3} {'type': 'loss', 'content': 0.06764672696590424, 'timestamp': '2025-09-10 02:59:53.423121', 'step': 18969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.475848', 'step': 18969, 'epoch': 3} {'type': 'loss', 'content': 0.1436186283826828, 'timestamp': '2025-09-10 02:59:53.477858', 'step': 18970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.530674', 'step': 18970, 'epoch': 3} {'type': 'loss', 'content': 0.05503769963979721, 'timestamp': '2025-09-10 02:59:53.532740', 'step': 18971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:53.587280', 'step': 18971, 'epoch': 3} {'type': 'loss', 'content': 0.1284724771976471, 'timestamp': '2025-09-10 02:59:53.592893', 'step': 18972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:53.645209', 'step': 18972, 'epoch': 3} {'type': 'loss', 'content': 0.04054201766848564, 'timestamp': '2025-09-10 02:59:53.647423', 'step': 18973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.700101', 'step': 18973, 'epoch': 3} {'type': 'loss', 'content': 0.12276250123977661, 'timestamp': '2025-09-10 02:59:53.702428', 'step': 18974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:53.754841', 'step': 18974, 'epoch': 3} {'type': 'loss', 'content': 0.09543932229280472, 'timestamp': '2025-09-10 02:59:53.757190', 'step': 18975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.811390', 'step': 18975, 'epoch': 3} {'type': 'loss', 'content': 0.04554639756679535, 'timestamp': '2025-09-10 02:59:53.817389', 'step': 18976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.870528', 'step': 18976, 'epoch': 3} {'type': 'loss', 'content': 0.0974912941455841, 'timestamp': '2025-09-10 02:59:53.872725', 'step': 18977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:53.926483', 'step': 18977, 'epoch': 3} {'type': 'loss', 'content': 0.13527467846870422, 'timestamp': '2025-09-10 02:59:53.928590', 'step': 18978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:53.981950', 'step': 18978, 'epoch': 3} {'type': 'loss', 'content': 0.12052280455827713, 'timestamp': '2025-09-10 02:59:53.984057', 'step': 18979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:54.042047', 'step': 18979, 'epoch': 3} {'type': 'loss', 'content': 0.14242441952228546, 'timestamp': '2025-09-10 02:59:54.047839', 'step': 18980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:54.100441', 'step': 18980, 'epoch': 3} {'type': 'loss', 'content': 0.14572176337242126, 'timestamp': '2025-09-10 02:59:54.102563', 'step': 18981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:54.154879', 'step': 18981, 'epoch': 3} {'type': 'loss', 'content': 0.11897526681423187, 'timestamp': '2025-09-10 02:59:54.157025', 'step': 18982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:54.210282', 'step': 18982, 'epoch': 3} {'type': 'loss', 'content': 0.14231739938259125, 'timestamp': '2025-09-10 02:59:54.212351', 'step': 18983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:54.265006', 'step': 18983, 'epoch': 3} {'type': 'loss', 'content': 0.14156046509742737, 'timestamp': '2025-09-10 02:59:54.270755', 'step': 18984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:54.323818', 'step': 18984, 'epoch': 3} {'type': 'loss', 'content': 0.0817507728934288, 'timestamp': '2025-09-10 02:59:54.325901', 'step': 18985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:54.379720', 'step': 18985, 'epoch': 3} {'type': 'loss', 'content': 0.13322192430496216, 'timestamp': '2025-09-10 02:59:54.381981', 'step': 18986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:54.435990', 'step': 18986, 'epoch': 3} {'type': 'loss', 'content': 0.08693502843379974, 'timestamp': '2025-09-10 02:59:54.438115', 'step': 18987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:54.491058', 'step': 18987, 'epoch': 3} {'type': 'loss', 'content': 0.0432809554040432, 'timestamp': '2025-09-10 02:59:54.496900', 'step': 18988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:54.550326', 'step': 18988, 'epoch': 3} {'type': 'loss', 'content': 0.11466570198535919, 'timestamp': '2025-09-10 02:59:54.552656', 'step': 18989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:54.606033', 'step': 18989, 'epoch': 3} {'type': 'loss', 'content': 0.06815269589424133, 'timestamp': '2025-09-10 02:59:54.608449', 'step': 18990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:54.663397', 'step': 18990, 'epoch': 3} {'type': 'loss', 'content': 0.10845381021499634, 'timestamp': '2025-09-10 02:59:54.665613', 'step': 18991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:54.718585', 'step': 18991, 'epoch': 3} {'type': 'loss', 'content': 0.09472556412220001, 'timestamp': '2025-09-10 02:59:54.724469', 'step': 18992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:54.776587', 'step': 18992, 'epoch': 3} {'type': 'loss', 'content': 0.11846109479665756, 'timestamp': '2025-09-10 02:59:54.778777', 'step': 18993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:54.831852', 'step': 18993, 'epoch': 3} {'type': 'loss', 'content': 0.0421549417078495, 'timestamp': '2025-09-10 02:59:54.833990', 'step': 18994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:54.887602', 'step': 18994, 'epoch': 3} {'type': 'loss', 'content': 0.10231263935565948, 'timestamp': '2025-09-10 02:59:54.889822', 'step': 18995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:54.944065', 'step': 18995, 'epoch': 3} {'type': 'loss', 'content': 0.11454421281814575, 'timestamp': '2025-09-10 02:59:54.949991', 'step': 18996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:55.002731', 'step': 18996, 'epoch': 3} {'type': 'loss', 'content': 0.11351525038480759, 'timestamp': '2025-09-10 02:59:55.004929', 'step': 18997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:55.057794', 'step': 18997, 'epoch': 3} {'type': 'loss', 'content': 0.07987714558839798, 'timestamp': '2025-09-10 02:59:55.059904', 'step': 18998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:55.113761', 'step': 18998, 'epoch': 3} {'type': 'loss', 'content': 0.03625873476266861, 'timestamp': '2025-09-10 02:59:55.115912', 'step': 18999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:55.169223', 'step': 18999, 'epoch': 3} {'type': 'loss', 'content': 0.1476791650056839, 'timestamp': '2025-09-10 02:59:55.174847', 'step': 19000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19000', 'timestamp': '2025-09-10 02:59:55.621733', 'step': 19000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:55.681502', 'step': 19000, 'epoch': 3} {'type': 'loss', 'content': 0.04762279614806175, 'timestamp': '2025-09-10 02:59:55.684803', 'step': 19001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:55.740165', 'step': 19001, 'epoch': 3} {'type': 'loss', 'content': 0.13036035001277924, 'timestamp': '2025-09-10 02:59:55.742409', 'step': 19002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:55.796268', 'step': 19002, 'epoch': 3} {'type': 'loss', 'content': 0.04780355095863342, 'timestamp': '2025-09-10 02:59:55.798580', 'step': 19003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:55.853594', 'step': 19003, 'epoch': 3} {'type': 'loss', 'content': 0.09381626546382904, 'timestamp': '2025-09-10 02:59:55.859879', 'step': 19004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:55.913013', 'step': 19004, 'epoch': 3} {'type': 'loss', 'content': 0.11694839596748352, 'timestamp': '2025-09-10 02:59:55.915101', 'step': 19005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:55.969298', 'step': 19005, 'epoch': 3} {'type': 'loss', 'content': 0.06771707534790039, 'timestamp': '2025-09-10 02:59:55.971560', 'step': 19006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:56.025888', 'step': 19006, 'epoch': 3} {'type': 'loss', 'content': 0.12426584213972092, 'timestamp': '2025-09-10 02:59:56.028052', 'step': 19007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:56.082965', 'step': 19007, 'epoch': 3} {'type': 'loss', 'content': 0.1033756360411644, 'timestamp': '2025-09-10 02:59:56.089473', 'step': 19008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:56.144128', 'step': 19008, 'epoch': 3} {'type': 'loss', 'content': 0.08328094333410263, 'timestamp': '2025-09-10 02:59:56.146256', 'step': 19009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:56.200032', 'step': 19009, 'epoch': 3} {'type': 'loss', 'content': 0.03657841682434082, 'timestamp': '2025-09-10 02:59:56.201963', 'step': 19010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:56.257390', 'step': 19010, 'epoch': 3} {'type': 'loss', 'content': 0.022997748106718063, 'timestamp': '2025-09-10 02:59:56.259476', 'step': 19011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:56.312859', 'step': 19011, 'epoch': 3} {'type': 'loss', 'content': 0.038873787969350815, 'timestamp': '2025-09-10 02:59:56.318996', 'step': 19012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:56.371260', 'step': 19012, 'epoch': 3} {'type': 'loss', 'content': 0.08481671661138535, 'timestamp': '2025-09-10 02:59:56.373490', 'step': 19013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:56.427137', 'step': 19013, 'epoch': 3} {'type': 'loss', 'content': 0.07704488933086395, 'timestamp': '2025-09-10 02:59:56.429326', 'step': 19014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:56.482715', 'step': 19014, 'epoch': 3} {'type': 'loss', 'content': 0.05126044154167175, 'timestamp': '2025-09-10 02:59:56.484932', 'step': 19015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:56.538623', 'step': 19015, 'epoch': 3} {'type': 'loss', 'content': 0.1364051103591919, 'timestamp': '2025-09-10 02:59:56.544535', 'step': 19016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:56.598563', 'step': 19016, 'epoch': 3} {'type': 'loss', 'content': 0.053420111536979675, 'timestamp': '2025-09-10 02:59:56.600715', 'step': 19017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:56.654379', 'step': 19017, 'epoch': 3} {'type': 'loss', 'content': 0.08829143643379211, 'timestamp': '2025-09-10 02:59:56.656652', 'step': 19018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:56.710834', 'step': 19018, 'epoch': 3} {'type': 'loss', 'content': 0.14010024070739746, 'timestamp': '2025-09-10 02:59:56.712910', 'step': 19019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:56.766783', 'step': 19019, 'epoch': 3} {'type': 'loss', 'content': 0.0359242744743824, 'timestamp': '2025-09-10 02:59:56.772653', 'step': 19020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:56.825119', 'step': 19020, 'epoch': 3} {'type': 'loss', 'content': 0.09409037232398987, 'timestamp': '2025-09-10 02:59:56.827273', 'step': 19021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:56.881415', 'step': 19021, 'epoch': 3} {'type': 'loss', 'content': 0.0760439783334732, 'timestamp': '2025-09-10 02:59:56.883489', 'step': 19022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:56.936883', 'step': 19022, 'epoch': 3} {'type': 'loss', 'content': 0.09303984045982361, 'timestamp': '2025-09-10 02:59:56.938986', 'step': 19023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:56.992221', 'step': 19023, 'epoch': 3} {'type': 'loss', 'content': 0.050688277930021286, 'timestamp': '2025-09-10 02:59:56.998119', 'step': 19024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:57.050742', 'step': 19024, 'epoch': 3} {'type': 'loss', 'content': 0.06083769351243973, 'timestamp': '2025-09-10 02:59:57.052599', 'step': 19025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:57.105570', 'step': 19025, 'epoch': 3} {'type': 'loss', 'content': 0.11992885917425156, 'timestamp': '2025-09-10 02:59:57.107502', 'step': 19026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:57.160114', 'step': 19026, 'epoch': 3} {'type': 'loss', 'content': 0.11938519030809402, 'timestamp': '2025-09-10 02:59:57.162363', 'step': 19027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:57.217045', 'step': 19027, 'epoch': 3} {'type': 'loss', 'content': 0.07361605018377304, 'timestamp': '2025-09-10 02:59:57.223211', 'step': 19028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:57.279861', 'step': 19028, 'epoch': 3} {'type': 'loss', 'content': 0.07500039041042328, 'timestamp': '2025-09-10 02:59:57.281975', 'step': 19029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:57.334929', 'step': 19029, 'epoch': 3} {'type': 'loss', 'content': 0.08107436448335648, 'timestamp': '2025-09-10 02:59:57.337133', 'step': 19030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:57.390588', 'step': 19030, 'epoch': 3} {'type': 'loss', 'content': 0.19006940722465515, 'timestamp': '2025-09-10 02:59:57.392879', 'step': 19031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:57.446976', 'step': 19031, 'epoch': 3} {'type': 'loss', 'content': 0.07570026814937592, 'timestamp': '2025-09-10 02:59:57.453199', 'step': 19032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:57.507208', 'step': 19032, 'epoch': 3} {'type': 'loss', 'content': 0.10274414718151093, 'timestamp': '2025-09-10 02:59:57.509407', 'step': 19033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:57.562909', 'step': 19033, 'epoch': 3} {'type': 'loss', 'content': 0.06292565166950226, 'timestamp': '2025-09-10 02:59:57.565150', 'step': 19034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:57.619048', 'step': 19034, 'epoch': 3} {'type': 'loss', 'content': 0.10478533059358597, 'timestamp': '2025-09-10 02:59:57.621261', 'step': 19035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:57.675303', 'step': 19035, 'epoch': 3} {'type': 'loss', 'content': 0.02116837538778782, 'timestamp': '2025-09-10 02:59:57.685047', 'step': 19036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:57.738900', 'step': 19036, 'epoch': 3} {'type': 'loss', 'content': 0.11935543268918991, 'timestamp': '2025-09-10 02:59:57.743116', 'step': 19037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:57.797987', 'step': 19037, 'epoch': 3} {'type': 'loss', 'content': 0.054855454713106155, 'timestamp': '2025-09-10 02:59:57.800179', 'step': 19038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:57.853519', 'step': 19038, 'epoch': 3} {'type': 'loss', 'content': 0.09820403903722763, 'timestamp': '2025-09-10 02:59:57.855697', 'step': 19039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:57.910968', 'step': 19039, 'epoch': 3} {'type': 'loss', 'content': 0.04107658192515373, 'timestamp': '2025-09-10 02:59:57.919275', 'step': 19040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:57.974729', 'step': 19040, 'epoch': 3} {'type': 'loss', 'content': 0.13148106634616852, 'timestamp': '2025-09-10 02:59:57.976817', 'step': 19041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:58.030240', 'step': 19041, 'epoch': 3} {'type': 'loss', 'content': 0.06018814444541931, 'timestamp': '2025-09-10 02:59:58.033765', 'step': 19042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:58.089666', 'step': 19042, 'epoch': 3} {'type': 'loss', 'content': 0.0869116485118866, 'timestamp': '2025-09-10 02:59:58.091793', 'step': 19043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:58.145255', 'step': 19043, 'epoch': 3} {'type': 'loss', 'content': 0.07738381624221802, 'timestamp': '2025-09-10 02:59:58.151239', 'step': 19044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:58.205962', 'step': 19044, 'epoch': 3} {'type': 'loss', 'content': 0.12020746618509293, 'timestamp': '2025-09-10 02:59:58.208753', 'step': 19045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:58.266334', 'step': 19045, 'epoch': 3} {'type': 'loss', 'content': 0.07740677148103714, 'timestamp': '2025-09-10 02:59:58.270356', 'step': 19046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:58.325184', 'step': 19046, 'epoch': 3} {'type': 'loss', 'content': 0.09755473583936691, 'timestamp': '2025-09-10 02:59:58.327526', 'step': 19047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:58.380989', 'step': 19047, 'epoch': 3} {'type': 'loss', 'content': 0.05106506124138832, 'timestamp': '2025-09-10 02:59:58.387097', 'step': 19048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:58.444023', 'step': 19048, 'epoch': 3} {'type': 'loss', 'content': 0.03902651369571686, 'timestamp': '2025-09-10 02:59:58.446218', 'step': 19049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:58.502726', 'step': 19049, 'epoch': 3} {'type': 'loss', 'content': 0.1358306109905243, 'timestamp': '2025-09-10 02:59:58.504921', 'step': 19050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:58.560335', 'step': 19050, 'epoch': 3} {'type': 'loss', 'content': 0.07506074756383896, 'timestamp': '2025-09-10 02:59:58.563293', 'step': 19051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:58.616941', 'step': 19051, 'epoch': 3} {'type': 'loss', 'content': 0.0742688700556755, 'timestamp': '2025-09-10 02:59:58.622733', 'step': 19052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:58.675626', 'step': 19052, 'epoch': 3} {'type': 'loss', 'content': 0.1255130022764206, 'timestamp': '2025-09-10 02:59:58.677766', 'step': 19053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:58.733748', 'step': 19053, 'epoch': 3} {'type': 'loss', 'content': 0.12749259173870087, 'timestamp': '2025-09-10 02:59:58.735927', 'step': 19054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:58.791189', 'step': 19054, 'epoch': 3} {'type': 'loss', 'content': 0.06181655079126358, 'timestamp': '2025-09-10 02:59:58.793410', 'step': 19055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:58.848630', 'step': 19055, 'epoch': 3} {'type': 'loss', 'content': 0.13524466753005981, 'timestamp': '2025-09-10 02:59:58.860510', 'step': 19056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:58.925827', 'step': 19056, 'epoch': 3} {'type': 'loss', 'content': 0.06548760831356049, 'timestamp': '2025-09-10 02:59:58.927969', 'step': 19057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:58.981718', 'step': 19057, 'epoch': 3} {'type': 'loss', 'content': 0.1368803083896637, 'timestamp': '2025-09-10 02:59:58.984020', 'step': 19058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:59.037449', 'step': 19058, 'epoch': 3} {'type': 'loss', 'content': 0.11780290305614471, 'timestamp': '2025-09-10 02:59:59.039657', 'step': 19059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:59.093906', 'step': 19059, 'epoch': 3} {'type': 'loss', 'content': 0.03885609656572342, 'timestamp': '2025-09-10 02:59:59.104430', 'step': 19060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:59.166257', 'step': 19060, 'epoch': 3} {'type': 'loss', 'content': 0.07300835102796555, 'timestamp': '2025-09-10 02:59:59.168589', 'step': 19061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:59.221439', 'step': 19061, 'epoch': 3} {'type': 'loss', 'content': 0.03325367346405983, 'timestamp': '2025-09-10 02:59:59.223595', 'step': 19062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:59.278021', 'step': 19062, 'epoch': 3} {'type': 'loss', 'content': 0.07801056653261185, 'timestamp': '2025-09-10 02:59:59.280412', 'step': 19063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:59.335213', 'step': 19063, 'epoch': 3} {'type': 'loss', 'content': 0.06344310939311981, 'timestamp': '2025-09-10 02:59:59.341455', 'step': 19064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:59.394664', 'step': 19064, 'epoch': 3} {'type': 'loss', 'content': 0.09833049774169922, 'timestamp': '2025-09-10 02:59:59.396760', 'step': 19065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:59.454172', 'step': 19065, 'epoch': 3} {'type': 'loss', 'content': 0.10094930231571198, 'timestamp': '2025-09-10 02:59:59.456177', 'step': 19066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:59:59.520143', 'step': 19066, 'epoch': 3} {'type': 'loss', 'content': 0.03026614524424076, 'timestamp': '2025-09-10 02:59:59.522302', 'step': 19067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 02:59:59.580972', 'step': 19067, 'epoch': 3} {'type': 'loss', 'content': 0.06215459853410721, 'timestamp': '2025-09-10 02:59:59.586922', 'step': 19068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 02:59:59.640410', 'step': 19068, 'epoch': 3} {'type': 'loss', 'content': 0.07305390387773514, 'timestamp': '2025-09-10 02:59:59.642716', 'step': 19069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:59:59.696173', 'step': 19069, 'epoch': 3} {'type': 'loss', 'content': 0.08456466346979141, 'timestamp': '2025-09-10 02:59:59.698454', 'step': 19070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:59.752856', 'step': 19070, 'epoch': 3} {'type': 'loss', 'content': 0.10259858518838882, 'timestamp': '2025-09-10 02:59:59.755606', 'step': 19071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 02:59:59.816138', 'step': 19071, 'epoch': 3} {'type': 'loss', 'content': 0.034655384719371796, 'timestamp': '2025-09-10 02:59:59.822084', 'step': 19072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:59.879924', 'step': 19072, 'epoch': 3} {'type': 'loss', 'content': 0.09724485874176025, 'timestamp': '2025-09-10 02:59:59.882495', 'step': 19073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:59:59.944759', 'step': 19073, 'epoch': 3} {'type': 'loss', 'content': 0.1031939908862114, 'timestamp': '2025-09-10 02:59:59.946924', 'step': 19074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.002604', 'step': 19074, 'epoch': 3} {'type': 'loss', 'content': 0.03872516378760338, 'timestamp': '2025-09-10 03:00:00.004921', 'step': 19075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.058402', 'step': 19075, 'epoch': 3} {'type': 'loss', 'content': 0.03570476174354553, 'timestamp': '2025-09-10 03:00:00.067971', 'step': 19076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:00.121800', 'step': 19076, 'epoch': 3} {'type': 'loss', 'content': 0.1400775909423828, 'timestamp': '2025-09-10 03:00:00.126472', 'step': 19077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:00.185417', 'step': 19077, 'epoch': 3} {'type': 'loss', 'content': 0.08381133526563644, 'timestamp': '2025-09-10 03:00:00.189380', 'step': 19078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.244207', 'step': 19078, 'epoch': 3} {'type': 'loss', 'content': 0.06757021695375443, 'timestamp': '2025-09-10 03:00:00.246889', 'step': 19079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.300603', 'step': 19079, 'epoch': 3} {'type': 'loss', 'content': 0.12984852492809296, 'timestamp': '2025-09-10 03:00:00.307172', 'step': 19080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.365160', 'step': 19080, 'epoch': 3} {'type': 'loss', 'content': 0.050931043922901154, 'timestamp': '2025-09-10 03:00:00.369458', 'step': 19081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:00.423889', 'step': 19081, 'epoch': 3} {'type': 'loss', 'content': 0.11356016993522644, 'timestamp': '2025-09-10 03:00:00.426008', 'step': 19082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.479511', 'step': 19082, 'epoch': 3} {'type': 'loss', 'content': 0.0898052230477333, 'timestamp': '2025-09-10 03:00:00.482710', 'step': 19083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:00.536398', 'step': 19083, 'epoch': 3} {'type': 'loss', 'content': 0.038911908864974976, 'timestamp': '2025-09-10 03:00:00.542516', 'step': 19084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.602398', 'step': 19084, 'epoch': 3} {'type': 'loss', 'content': 0.09923022985458374, 'timestamp': '2025-09-10 03:00:00.604769', 'step': 19085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.657874', 'step': 19085, 'epoch': 3} {'type': 'loss', 'content': 0.0836833193898201, 'timestamp': '2025-09-10 03:00:00.659989', 'step': 19086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.714517', 'step': 19086, 'epoch': 3} {'type': 'loss', 'content': 0.07418745756149292, 'timestamp': '2025-09-10 03:00:00.716564', 'step': 19087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:00.770054', 'step': 19087, 'epoch': 3} {'type': 'loss', 'content': 0.019325999543070793, 'timestamp': '2025-09-10 03:00:00.776855', 'step': 19088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:00.837086', 'step': 19088, 'epoch': 3} {'type': 'loss', 'content': 0.041116759181022644, 'timestamp': '2025-09-10 03:00:00.839381', 'step': 19089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:00.892626', 'step': 19089, 'epoch': 3} {'type': 'loss', 'content': 0.0609760582447052, 'timestamp': '2025-09-10 03:00:00.896430', 'step': 19090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:00.955539', 'step': 19090, 'epoch': 3} {'type': 'loss', 'content': 0.02260158210992813, 'timestamp': '2025-09-10 03:00:00.957679', 'step': 19091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:01.022389', 'step': 19091, 'epoch': 3} {'type': 'loss', 'content': 0.06168795004487038, 'timestamp': '2025-09-10 03:00:01.028408', 'step': 19092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:01.081932', 'step': 19092, 'epoch': 3} {'type': 'loss', 'content': 0.09552110731601715, 'timestamp': '2025-09-10 03:00:01.087627', 'step': 19093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:01.148084', 'step': 19093, 'epoch': 3} {'type': 'loss', 'content': 0.020419657230377197, 'timestamp': '2025-09-10 03:00:01.150112', 'step': 19094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:01.206087', 'step': 19094, 'epoch': 3} {'type': 'loss', 'content': 0.05809586122632027, 'timestamp': '2025-09-10 03:00:01.208400', 'step': 19095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:01.262102', 'step': 19095, 'epoch': 3} {'type': 'loss', 'content': 0.03647204488515854, 'timestamp': '2025-09-10 03:00:01.268855', 'step': 19096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:01.322535', 'step': 19096, 'epoch': 3} {'type': 'loss', 'content': 0.08948606997728348, 'timestamp': '2025-09-10 03:00:01.326150', 'step': 19097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:01.379551', 'step': 19097, 'epoch': 3} {'type': 'loss', 'content': 0.11769147217273712, 'timestamp': '2025-09-10 03:00:01.381729', 'step': 19098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:01.437430', 'step': 19098, 'epoch': 3} {'type': 'loss', 'content': 0.13638760149478912, 'timestamp': '2025-09-10 03:00:01.439610', 'step': 19099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:01.510670', 'step': 19099, 'epoch': 3} {'type': 'loss', 'content': 0.06691578775644302, 'timestamp': '2025-09-10 03:00:01.516852', 'step': 19100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:01.581026', 'step': 19100, 'epoch': 3} {'type': 'loss', 'content': 0.035491567105054855, 'timestamp': '2025-09-10 03:00:01.583256', 'step': 19101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:01.637906', 'step': 19101, 'epoch': 3} {'type': 'loss', 'content': 0.07695125788450241, 'timestamp': '2025-09-10 03:00:01.640085', 'step': 19102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:01.696899', 'step': 19102, 'epoch': 3} {'type': 'loss', 'content': 0.10891571640968323, 'timestamp': '2025-09-10 03:00:01.699223', 'step': 19103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:01.758176', 'step': 19103, 'epoch': 3} {'type': 'loss', 'content': 0.05064713582396507, 'timestamp': '2025-09-10 03:00:01.768324', 'step': 19104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:01.829570', 'step': 19104, 'epoch': 3} {'type': 'loss', 'content': 0.08091052621603012, 'timestamp': '2025-09-10 03:00:01.831790', 'step': 19105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:01.887636', 'step': 19105, 'epoch': 3} {'type': 'loss', 'content': 0.032832514494657516, 'timestamp': '2025-09-10 03:00:01.889919', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 03:00:18.144091', 'step': 19106, 'epoch': 3} {'type': 'pplx', 'content': 11862.585738150388, 'timestamp': '2025-09-10 03:00:18.146893', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:18.200805', 'step': 19106, 'epoch': 3} {'type': 'loss', 'content': 0.04620254039764404, 'timestamp': '2025-09-10 03:00:18.203207', 'step': 19107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:18.257317', 'step': 19107, 'epoch': 3} {'type': 'loss', 'content': 0.049394674599170685, 'timestamp': '2025-09-10 03:00:18.263424', 'step': 19108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:18.316911', 'step': 19108, 'epoch': 3} {'type': 'loss', 'content': 0.024348707869648933, 'timestamp': '2025-09-10 03:00:18.319071', 'step': 19109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:18.372670', 'step': 19109, 'epoch': 3} {'type': 'loss', 'content': 0.0936460942029953, 'timestamp': '2025-09-10 03:00:18.374772', 'step': 19110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:18.430507', 'step': 19110, 'epoch': 3} {'type': 'loss', 'content': 0.07952423393726349, 'timestamp': '2025-09-10 03:00:18.432695', 'step': 19111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:18.486769', 'step': 19111, 'epoch': 3} {'type': 'loss', 'content': 0.10172673314809799, 'timestamp': '2025-09-10 03:00:18.492370', 'step': 19112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:18.545856', 'step': 19112, 'epoch': 3} {'type': 'loss', 'content': 0.07010053843259811, 'timestamp': '2025-09-10 03:00:18.547711', 'step': 19113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:18.603124', 'step': 19113, 'epoch': 3} {'type': 'loss', 'content': 0.12968821823596954, 'timestamp': '2025-09-10 03:00:18.605004', 'step': 19114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:18.659362', 'step': 19114, 'epoch': 3} {'type': 'loss', 'content': 0.02533075213432312, 'timestamp': '2025-09-10 03:00:18.661199', 'step': 19115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:18.714540', 'step': 19115, 'epoch': 3} {'type': 'loss', 'content': 0.06273861229419708, 'timestamp': '2025-09-10 03:00:18.720465', 'step': 19116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:18.774498', 'step': 19116, 'epoch': 3} {'type': 'loss', 'content': 0.03968166932463646, 'timestamp': '2025-09-10 03:00:18.776555', 'step': 19117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:18.830179', 'step': 19117, 'epoch': 3} {'type': 'loss', 'content': 0.08352106064558029, 'timestamp': '2025-09-10 03:00:18.832393', 'step': 19118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:18.886897', 'step': 19118, 'epoch': 3} {'type': 'loss', 'content': 0.07991296797990799, 'timestamp': '2025-09-10 03:00:18.889021', 'step': 19119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:18.943092', 'step': 19119, 'epoch': 3} {'type': 'loss', 'content': 0.06404181569814682, 'timestamp': '2025-09-10 03:00:18.949670', 'step': 19120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:19.003890', 'step': 19120, 'epoch': 3} {'type': 'loss', 'content': 0.06679718941450119, 'timestamp': '2025-09-10 03:00:19.005668', 'step': 19121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:19.059435', 'step': 19121, 'epoch': 3} {'type': 'loss', 'content': 0.1882578432559967, 'timestamp': '2025-09-10 03:00:19.061468', 'step': 19122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:19.114984', 'step': 19122, 'epoch': 3} {'type': 'loss', 'content': 0.06774185597896576, 'timestamp': '2025-09-10 03:00:19.116980', 'step': 19123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:19.171655', 'step': 19123, 'epoch': 3} {'type': 'loss', 'content': 0.19806444644927979, 'timestamp': '2025-09-10 03:00:19.177674', 'step': 19124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:19.231526', 'step': 19124, 'epoch': 3} {'type': 'loss', 'content': 0.05803689733147621, 'timestamp': '2025-09-10 03:00:19.233627', 'step': 19125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:19.286969', 'step': 19125, 'epoch': 3} {'type': 'loss', 'content': 0.08484189957380295, 'timestamp': '2025-09-10 03:00:19.289187', 'step': 19126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:19.343583', 'step': 19126, 'epoch': 3} {'type': 'loss', 'content': 0.10408426076173782, 'timestamp': '2025-09-10 03:00:19.345883', 'step': 19127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:19.399420', 'step': 19127, 'epoch': 3} {'type': 'loss', 'content': 0.063779316842556, 'timestamp': '2025-09-10 03:00:19.405067', 'step': 19128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:19.458951', 'step': 19128, 'epoch': 3} {'type': 'loss', 'content': 0.11476399004459381, 'timestamp': '2025-09-10 03:00:19.460716', 'step': 19129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:19.513858', 'step': 19129, 'epoch': 3} {'type': 'loss', 'content': 0.10485708713531494, 'timestamp': '2025-09-10 03:00:19.515794', 'step': 19130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:19.569857', 'step': 19130, 'epoch': 3} {'type': 'loss', 'content': 0.030715910717844963, 'timestamp': '2025-09-10 03:00:19.572060', 'step': 19131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:19.626021', 'step': 19131, 'epoch': 3} {'type': 'loss', 'content': 0.22520722448825836, 'timestamp': '2025-09-10 03:00:19.631878', 'step': 19132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:19.686815', 'step': 19132, 'epoch': 3} {'type': 'loss', 'content': 0.0352875292301178, 'timestamp': '2025-09-10 03:00:19.689006', 'step': 19133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:19.743742', 'step': 19133, 'epoch': 3} {'type': 'loss', 'content': 0.07393044233322144, 'timestamp': '2025-09-10 03:00:19.746440', 'step': 19134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:19.799737', 'step': 19134, 'epoch': 3} {'type': 'loss', 'content': 0.09265223145484924, 'timestamp': '2025-09-10 03:00:19.801901', 'step': 19135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:19.855585', 'step': 19135, 'epoch': 3} {'type': 'loss', 'content': 0.11352003365755081, 'timestamp': '2025-09-10 03:00:19.861724', 'step': 19136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:19.915436', 'step': 19136, 'epoch': 3} {'type': 'loss', 'content': 0.10320252925157547, 'timestamp': '2025-09-10 03:00:19.917402', 'step': 19137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:19.970446', 'step': 19137, 'epoch': 3} {'type': 'loss', 'content': 0.15133674442768097, 'timestamp': '2025-09-10 03:00:19.972419', 'step': 19138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:20.025977', 'step': 19138, 'epoch': 3} {'type': 'loss', 'content': 0.08904854953289032, 'timestamp': '2025-09-10 03:00:20.027681', 'step': 19139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:20.083062', 'step': 19139, 'epoch': 3} {'type': 'loss', 'content': 0.07821245491504669, 'timestamp': '2025-09-10 03:00:20.088674', 'step': 19140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:20.142302', 'step': 19140, 'epoch': 3} {'type': 'loss', 'content': 0.060049839317798615, 'timestamp': '2025-09-10 03:00:20.144673', 'step': 19141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:20.199406', 'step': 19141, 'epoch': 3} {'type': 'loss', 'content': 0.04525361210107803, 'timestamp': '2025-09-10 03:00:20.201327', 'step': 19142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:20.255555', 'step': 19142, 'epoch': 3} {'type': 'loss', 'content': 0.12857964634895325, 'timestamp': '2025-09-10 03:00:20.257700', 'step': 19143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:20.311203', 'step': 19143, 'epoch': 3} {'type': 'loss', 'content': 0.10054503381252289, 'timestamp': '2025-09-10 03:00:20.317151', 'step': 19144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:20.370655', 'step': 19144, 'epoch': 3} {'type': 'loss', 'content': 0.0805630087852478, 'timestamp': '2025-09-10 03:00:20.372753', 'step': 19145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:20.426901', 'step': 19145, 'epoch': 3} {'type': 'loss', 'content': 0.0900070071220398, 'timestamp': '2025-09-10 03:00:20.428586', 'step': 19146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:20.482646', 'step': 19146, 'epoch': 3} {'type': 'loss', 'content': 0.1528993546962738, 'timestamp': '2025-09-10 03:00:20.484354', 'step': 19147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:20.539379', 'step': 19147, 'epoch': 3} {'type': 'loss', 'content': 0.0953422412276268, 'timestamp': '2025-09-10 03:00:20.544871', 'step': 19148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:20.597932', 'step': 19148, 'epoch': 3} {'type': 'loss', 'content': 0.05583452433347702, 'timestamp': '2025-09-10 03:00:20.600005', 'step': 19149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:20.653663', 'step': 19149, 'epoch': 3} {'type': 'loss', 'content': 0.09844287484884262, 'timestamp': '2025-09-10 03:00:20.655802', 'step': 19150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:20.709048', 'step': 19150, 'epoch': 3} {'type': 'loss', 'content': 0.032474324107170105, 'timestamp': '2025-09-10 03:00:20.711077', 'step': 19151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:20.765444', 'step': 19151, 'epoch': 3} {'type': 'loss', 'content': 0.10698634386062622, 'timestamp': '2025-09-10 03:00:20.771540', 'step': 19152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:20.826407', 'step': 19152, 'epoch': 3} {'type': 'loss', 'content': 0.0865183025598526, 'timestamp': '2025-09-10 03:00:20.828526', 'step': 19153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:20.882237', 'step': 19153, 'epoch': 3} {'type': 'loss', 'content': 0.12500767409801483, 'timestamp': '2025-09-10 03:00:20.884394', 'step': 19154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:20.938100', 'step': 19154, 'epoch': 3} {'type': 'loss', 'content': 0.15902762115001678, 'timestamp': '2025-09-10 03:00:20.940047', 'step': 19155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:20.993409', 'step': 19155, 'epoch': 3} {'type': 'loss', 'content': 0.15243442356586456, 'timestamp': '2025-09-10 03:00:20.999013', 'step': 19156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.052673', 'step': 19156, 'epoch': 3} {'type': 'loss', 'content': 0.08770790696144104, 'timestamp': '2025-09-10 03:00:21.055995', 'step': 19157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.111072', 'step': 19157, 'epoch': 3} {'type': 'loss', 'content': 0.07144211232662201, 'timestamp': '2025-09-10 03:00:21.113718', 'step': 19158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.166948', 'step': 19158, 'epoch': 3} {'type': 'loss', 'content': 0.06572409719228745, 'timestamp': '2025-09-10 03:00:21.169152', 'step': 19159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.221923', 'step': 19159, 'epoch': 3} {'type': 'loss', 'content': 0.018850168213248253, 'timestamp': '2025-09-10 03:00:21.227656', 'step': 19160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:21.280000', 'step': 19160, 'epoch': 3} {'type': 'loss', 'content': 0.027178863063454628, 'timestamp': '2025-09-10 03:00:21.282186', 'step': 19161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:21.337771', 'step': 19161, 'epoch': 3} {'type': 'loss', 'content': 0.03965296596288681, 'timestamp': '2025-09-10 03:00:21.339904', 'step': 19162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:21.398158', 'step': 19162, 'epoch': 3} {'type': 'loss', 'content': 0.16191816329956055, 'timestamp': '2025-09-10 03:00:21.400049', 'step': 19163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.453446', 'step': 19163, 'epoch': 3} {'type': 'loss', 'content': 0.1341637820005417, 'timestamp': '2025-09-10 03:00:21.459106', 'step': 19164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.512000', 'step': 19164, 'epoch': 3} {'type': 'loss', 'content': 0.05136740580201149, 'timestamp': '2025-09-10 03:00:21.514027', 'step': 19165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.567182', 'step': 19165, 'epoch': 3} {'type': 'loss', 'content': 0.10296989977359772, 'timestamp': '2025-09-10 03:00:21.569302', 'step': 19166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.622484', 'step': 19166, 'epoch': 3} {'type': 'loss', 'content': 0.15523706376552582, 'timestamp': '2025-09-10 03:00:21.624606', 'step': 19167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:21.677902', 'step': 19167, 'epoch': 3} {'type': 'loss', 'content': 0.08522646129131317, 'timestamp': '2025-09-10 03:00:21.683777', 'step': 19168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.739516', 'step': 19168, 'epoch': 3} {'type': 'loss', 'content': 0.13062168657779694, 'timestamp': '2025-09-10 03:00:21.741747', 'step': 19169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:21.795199', 'step': 19169, 'epoch': 3} {'type': 'loss', 'content': 0.14854510128498077, 'timestamp': '2025-09-10 03:00:21.797213', 'step': 19170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:21.851237', 'step': 19170, 'epoch': 3} {'type': 'loss', 'content': 0.0994265004992485, 'timestamp': '2025-09-10 03:00:21.852921', 'step': 19171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:21.907351', 'step': 19171, 'epoch': 3} {'type': 'loss', 'content': 0.07089819759130478, 'timestamp': '2025-09-10 03:00:21.913133', 'step': 19172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:21.965727', 'step': 19172, 'epoch': 3} {'type': 'loss', 'content': 0.0868125781416893, 'timestamp': '2025-09-10 03:00:21.967751', 'step': 19173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:22.021314', 'step': 19173, 'epoch': 3} {'type': 'loss', 'content': 0.11666382104158401, 'timestamp': '2025-09-10 03:00:22.023497', 'step': 19174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:22.077054', 'step': 19174, 'epoch': 3} {'type': 'loss', 'content': 0.11886709928512573, 'timestamp': '2025-09-10 03:00:22.079087', 'step': 19175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:00:22.133072', 'step': 19175, 'epoch': 3} {'type': 'loss', 'content': 0.05677911639213562, 'timestamp': '2025-09-10 03:00:22.138903', 'step': 19176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:22.191629', 'step': 19176, 'epoch': 3} {'type': 'loss', 'content': 0.14068248867988586, 'timestamp': '2025-09-10 03:00:22.193884', 'step': 19177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:22.252064', 'step': 19177, 'epoch': 3} {'type': 'loss', 'content': 0.17820283770561218, 'timestamp': '2025-09-10 03:00:22.254195', 'step': 19178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:22.306849', 'step': 19178, 'epoch': 3} {'type': 'loss', 'content': 0.06527770310640335, 'timestamp': '2025-09-10 03:00:22.308972', 'step': 19179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:22.362685', 'step': 19179, 'epoch': 3} {'type': 'loss', 'content': 0.08362889289855957, 'timestamp': '2025-09-10 03:00:22.368647', 'step': 19180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:22.422624', 'step': 19180, 'epoch': 3} {'type': 'loss', 'content': 0.11703512072563171, 'timestamp': '2025-09-10 03:00:22.424749', 'step': 19181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:22.478396', 'step': 19181, 'epoch': 3} {'type': 'loss', 'content': 0.07816343009471893, 'timestamp': '2025-09-10 03:00:22.480584', 'step': 19182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:22.534149', 'step': 19182, 'epoch': 3} {'type': 'loss', 'content': 0.09681755304336548, 'timestamp': '2025-09-10 03:00:22.536422', 'step': 19183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:22.592192', 'step': 19183, 'epoch': 3} {'type': 'loss', 'content': 0.034095246344804764, 'timestamp': '2025-09-10 03:00:22.598187', 'step': 19184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:22.654420', 'step': 19184, 'epoch': 3} {'type': 'loss', 'content': 0.07739733159542084, 'timestamp': '2025-09-10 03:00:22.656802', 'step': 19185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:22.709631', 'step': 19185, 'epoch': 3} {'type': 'loss', 'content': 0.13047610223293304, 'timestamp': '2025-09-10 03:00:22.711752', 'step': 19186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:22.765076', 'step': 19186, 'epoch': 3} {'type': 'loss', 'content': 0.06365916877985, 'timestamp': '2025-09-10 03:00:22.767286', 'step': 19187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:22.820953', 'step': 19187, 'epoch': 3} {'type': 'loss', 'content': 0.12239542603492737, 'timestamp': '2025-09-10 03:00:22.826769', 'step': 19188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:22.880108', 'step': 19188, 'epoch': 3} {'type': 'loss', 'content': 0.13617034256458282, 'timestamp': '2025-09-10 03:00:22.882206', 'step': 19189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:22.936319', 'step': 19189, 'epoch': 3} {'type': 'loss', 'content': 0.05414648726582527, 'timestamp': '2025-09-10 03:00:22.938413', 'step': 19190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:22.992517', 'step': 19190, 'epoch': 3} {'type': 'loss', 'content': 0.05779412388801575, 'timestamp': '2025-09-10 03:00:22.994612', 'step': 19191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:23.047789', 'step': 19191, 'epoch': 3} {'type': 'loss', 'content': 0.11823518574237823, 'timestamp': '2025-09-10 03:00:23.053623', 'step': 19192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:23.106594', 'step': 19192, 'epoch': 3} {'type': 'loss', 'content': 0.1261172592639923, 'timestamp': '2025-09-10 03:00:23.108533', 'step': 19193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:23.163480', 'step': 19193, 'epoch': 3} {'type': 'loss', 'content': 0.13107085227966309, 'timestamp': '2025-09-10 03:00:23.165523', 'step': 19194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:23.219163', 'step': 19194, 'epoch': 3} {'type': 'loss', 'content': 0.07324832677841187, 'timestamp': '2025-09-10 03:00:23.221488', 'step': 19195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:23.274615', 'step': 19195, 'epoch': 3} {'type': 'loss', 'content': 0.08216211944818497, 'timestamp': '2025-09-10 03:00:23.280466', 'step': 19196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:23.333511', 'step': 19196, 'epoch': 3} {'type': 'loss', 'content': 0.07138360291719437, 'timestamp': '2025-09-10 03:00:23.335628', 'step': 19197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:23.388753', 'step': 19197, 'epoch': 3} {'type': 'loss', 'content': 0.03459035977721214, 'timestamp': '2025-09-10 03:00:23.390981', 'step': 19198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:23.445340', 'step': 19198, 'epoch': 3} {'type': 'loss', 'content': 0.06590800732374191, 'timestamp': '2025-09-10 03:00:23.447647', 'step': 19199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:23.503176', 'step': 19199, 'epoch': 3} {'type': 'loss', 'content': 0.17210879921913147, 'timestamp': '2025-09-10 03:00:23.509119', 'step': 19200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:23.563302', 'step': 19200, 'epoch': 3} {'type': 'loss', 'content': 0.08022327721118927, 'timestamp': '2025-09-10 03:00:23.565467', 'step': 19201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:23.618947', 'step': 19201, 'epoch': 3} {'type': 'loss', 'content': 0.03682069107890129, 'timestamp': '2025-09-10 03:00:23.621097', 'step': 19202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:23.674358', 'step': 19202, 'epoch': 3} {'type': 'loss', 'content': 0.07899273931980133, 'timestamp': '2025-09-10 03:00:23.676571', 'step': 19203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:23.730029', 'step': 19203, 'epoch': 3} {'type': 'loss', 'content': 0.10496711730957031, 'timestamp': '2025-09-10 03:00:23.736021', 'step': 19204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:23.788876', 'step': 19204, 'epoch': 3} {'type': 'loss', 'content': 0.0822385922074318, 'timestamp': '2025-09-10 03:00:23.791025', 'step': 19205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:23.844598', 'step': 19205, 'epoch': 3} {'type': 'loss', 'content': 0.11742259562015533, 'timestamp': '2025-09-10 03:00:23.846818', 'step': 19206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:23.900471', 'step': 19206, 'epoch': 3} {'type': 'loss', 'content': 0.12828263640403748, 'timestamp': '2025-09-10 03:00:23.902569', 'step': 19207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:23.956312', 'step': 19207, 'epoch': 3} {'type': 'loss', 'content': 0.08592712879180908, 'timestamp': '2025-09-10 03:00:23.962306', 'step': 19208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:24.016158', 'step': 19208, 'epoch': 3} {'type': 'loss', 'content': 0.09373922646045685, 'timestamp': '2025-09-10 03:00:24.018355', 'step': 19209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:24.072065', 'step': 19209, 'epoch': 3} {'type': 'loss', 'content': 0.20152747631072998, 'timestamp': '2025-09-10 03:00:24.074209', 'step': 19210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:24.128059', 'step': 19210, 'epoch': 3} {'type': 'loss', 'content': 0.06942590326070786, 'timestamp': '2025-09-10 03:00:24.130220', 'step': 19211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:24.183772', 'step': 19211, 'epoch': 3} {'type': 'loss', 'content': 0.05968658626079559, 'timestamp': '2025-09-10 03:00:24.189718', 'step': 19212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:24.242545', 'step': 19212, 'epoch': 3} {'type': 'loss', 'content': 0.1912440061569214, 'timestamp': '2025-09-10 03:00:24.244804', 'step': 19213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:24.297651', 'step': 19213, 'epoch': 3} {'type': 'loss', 'content': 0.08976858854293823, 'timestamp': '2025-09-10 03:00:24.299651', 'step': 19214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:24.352916', 'step': 19214, 'epoch': 3} {'type': 'loss', 'content': 0.07812745124101639, 'timestamp': '2025-09-10 03:00:24.354897', 'step': 19215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:24.407437', 'step': 19215, 'epoch': 3} {'type': 'loss', 'content': 0.09028229117393494, 'timestamp': '2025-09-10 03:00:24.413204', 'step': 19216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:24.466113', 'step': 19216, 'epoch': 3} {'type': 'loss', 'content': 0.10924849659204483, 'timestamp': '2025-09-10 03:00:24.468313', 'step': 19217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:24.522392', 'step': 19217, 'epoch': 3} {'type': 'loss', 'content': 0.06404188275337219, 'timestamp': '2025-09-10 03:00:24.524477', 'step': 19218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:24.578494', 'step': 19218, 'epoch': 3} {'type': 'loss', 'content': 0.17817792296409607, 'timestamp': '2025-09-10 03:00:24.580667', 'step': 19219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:24.634289', 'step': 19219, 'epoch': 3} {'type': 'loss', 'content': 0.13331109285354614, 'timestamp': '2025-09-10 03:00:24.640278', 'step': 19220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:00:24.693410', 'step': 19220, 'epoch': 3} {'type': 'loss', 'content': 0.14193841814994812, 'timestamp': '2025-09-10 03:00:24.695561', 'step': 19221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:24.748828', 'step': 19221, 'epoch': 3} {'type': 'loss', 'content': 0.07551394402980804, 'timestamp': '2025-09-10 03:00:24.750821', 'step': 19222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:24.805225', 'step': 19222, 'epoch': 3} {'type': 'loss', 'content': 0.11132672429084778, 'timestamp': '2025-09-10 03:00:24.807226', 'step': 19223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:24.861834', 'step': 19223, 'epoch': 3} {'type': 'loss', 'content': 0.08341951668262482, 'timestamp': '2025-09-10 03:00:24.867559', 'step': 19224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:24.919976', 'step': 19224, 'epoch': 3} {'type': 'loss', 'content': 0.09737518429756165, 'timestamp': '2025-09-10 03:00:24.922093', 'step': 19225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:24.975457', 'step': 19225, 'epoch': 3} {'type': 'loss', 'content': 0.03233195096254349, 'timestamp': '2025-09-10 03:00:24.977813', 'step': 19226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:25.032256', 'step': 19226, 'epoch': 3} {'type': 'loss', 'content': 0.06141210347414017, 'timestamp': '2025-09-10 03:00:25.034654', 'step': 19227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:25.088843', 'step': 19227, 'epoch': 3} {'type': 'loss', 'content': 0.11211423575878143, 'timestamp': '2025-09-10 03:00:25.094918', 'step': 19228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:25.148502', 'step': 19228, 'epoch': 3} {'type': 'loss', 'content': 0.1395687311887741, 'timestamp': '2025-09-10 03:00:25.150604', 'step': 19229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:25.205187', 'step': 19229, 'epoch': 3} {'type': 'loss', 'content': 0.12224729359149933, 'timestamp': '2025-09-10 03:00:25.207111', 'step': 19230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:25.260451', 'step': 19230, 'epoch': 3} {'type': 'loss', 'content': 0.014895775355398655, 'timestamp': '2025-09-10 03:00:25.262353', 'step': 19231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:25.315443', 'step': 19231, 'epoch': 3} {'type': 'loss', 'content': 0.12170165032148361, 'timestamp': '2025-09-10 03:00:25.321312', 'step': 19232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:25.374089', 'step': 19232, 'epoch': 3} {'type': 'loss', 'content': 0.06739158928394318, 'timestamp': '2025-09-10 03:00:25.376069', 'step': 19233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:25.429152', 'step': 19233, 'epoch': 3} {'type': 'loss', 'content': 0.12585537135601044, 'timestamp': '2025-09-10 03:00:25.431317', 'step': 19234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:25.485029', 'step': 19234, 'epoch': 3} {'type': 'loss', 'content': 0.11916500329971313, 'timestamp': '2025-09-10 03:00:25.487098', 'step': 19235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:25.540634', 'step': 19235, 'epoch': 3} {'type': 'loss', 'content': 0.07952941954135895, 'timestamp': '2025-09-10 03:00:25.546431', 'step': 19236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:25.600936', 'step': 19236, 'epoch': 3} {'type': 'loss', 'content': 0.1895231008529663, 'timestamp': '2025-09-10 03:00:25.603023', 'step': 19237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:25.656488', 'step': 19237, 'epoch': 3} {'type': 'loss', 'content': 0.04716615378856659, 'timestamp': '2025-09-10 03:00:25.659056', 'step': 19238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:25.712421', 'step': 19238, 'epoch': 3} {'type': 'loss', 'content': 0.0867295116186142, 'timestamp': '2025-09-10 03:00:25.714558', 'step': 19239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:25.767838', 'step': 19239, 'epoch': 3} {'type': 'loss', 'content': 0.1232839897274971, 'timestamp': '2025-09-10 03:00:25.773754', 'step': 19240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:25.826659', 'step': 19240, 'epoch': 3} {'type': 'loss', 'content': 0.06283749639987946, 'timestamp': '2025-09-10 03:00:25.828897', 'step': 19241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:25.882125', 'step': 19241, 'epoch': 3} {'type': 'loss', 'content': 0.10196679830551147, 'timestamp': '2025-09-10 03:00:25.884455', 'step': 19242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:25.938368', 'step': 19242, 'epoch': 3} {'type': 'loss', 'content': 0.14135444164276123, 'timestamp': '2025-09-10 03:00:25.940453', 'step': 19243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:25.995086', 'step': 19243, 'epoch': 3} {'type': 'loss', 'content': 0.12171214818954468, 'timestamp': '2025-09-10 03:00:26.000769', 'step': 19244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:26.053670', 'step': 19244, 'epoch': 3} {'type': 'loss', 'content': 0.06280004978179932, 'timestamp': '2025-09-10 03:00:26.055673', 'step': 19245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:26.109288', 'step': 19245, 'epoch': 3} {'type': 'loss', 'content': 0.10723257064819336, 'timestamp': '2025-09-10 03:00:26.111394', 'step': 19246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:26.165354', 'step': 19246, 'epoch': 3} {'type': 'loss', 'content': 0.10316526144742966, 'timestamp': '2025-09-10 03:00:26.167550', 'step': 19247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:26.221647', 'step': 19247, 'epoch': 3} {'type': 'loss', 'content': 0.06294918060302734, 'timestamp': '2025-09-10 03:00:26.227574', 'step': 19248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:26.280424', 'step': 19248, 'epoch': 3} {'type': 'loss', 'content': 0.11001478880643845, 'timestamp': '2025-09-10 03:00:26.282690', 'step': 19249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:26.335716', 'step': 19249, 'epoch': 3} {'type': 'loss', 'content': 0.16335025429725647, 'timestamp': '2025-09-10 03:00:26.337861', 'step': 19250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:26.391595', 'step': 19250, 'epoch': 3} {'type': 'loss', 'content': 0.1333474963903427, 'timestamp': '2025-09-10 03:00:26.393682', 'step': 19251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:26.446990', 'step': 19251, 'epoch': 3} {'type': 'loss', 'content': 0.06527857482433319, 'timestamp': '2025-09-10 03:00:26.452788', 'step': 19252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:26.505646', 'step': 19252, 'epoch': 3} {'type': 'loss', 'content': 0.08098218590021133, 'timestamp': '2025-09-10 03:00:26.507731', 'step': 19253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:26.567221', 'step': 19253, 'epoch': 3} {'type': 'loss', 'content': 0.05426168069243431, 'timestamp': '2025-09-10 03:00:26.569355', 'step': 19254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:26.623116', 'step': 19254, 'epoch': 3} {'type': 'loss', 'content': 0.08683169633150101, 'timestamp': '2025-09-10 03:00:26.625429', 'step': 19255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:26.679839', 'step': 19255, 'epoch': 3} {'type': 'loss', 'content': 0.12584717571735382, 'timestamp': '2025-09-10 03:00:26.685923', 'step': 19256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:26.738846', 'step': 19256, 'epoch': 3} {'type': 'loss', 'content': 0.07668981701135635, 'timestamp': '2025-09-10 03:00:26.740932', 'step': 19257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:26.794436', 'step': 19257, 'epoch': 3} {'type': 'loss', 'content': 0.0753001719713211, 'timestamp': '2025-09-10 03:00:26.796640', 'step': 19258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:26.850085', 'step': 19258, 'epoch': 3} {'type': 'loss', 'content': 0.12309475243091583, 'timestamp': '2025-09-10 03:00:26.852222', 'step': 19259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:26.905638', 'step': 19259, 'epoch': 3} {'type': 'loss', 'content': 0.0921986922621727, 'timestamp': '2025-09-10 03:00:26.911432', 'step': 19260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:26.964599', 'step': 19260, 'epoch': 3} {'type': 'loss', 'content': 0.08645299077033997, 'timestamp': '2025-09-10 03:00:26.966797', 'step': 19261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:27.019811', 'step': 19261, 'epoch': 3} {'type': 'loss', 'content': 0.07863800972700119, 'timestamp': '2025-09-10 03:00:27.022183', 'step': 19262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:27.076232', 'step': 19262, 'epoch': 3} {'type': 'loss', 'content': 0.07417674362659454, 'timestamp': '2025-09-10 03:00:27.078431', 'step': 19263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:27.132397', 'step': 19263, 'epoch': 3} {'type': 'loss', 'content': 0.13429611921310425, 'timestamp': '2025-09-10 03:00:27.138216', 'step': 19264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:27.192707', 'step': 19264, 'epoch': 3} {'type': 'loss', 'content': 0.06434503942728043, 'timestamp': '2025-09-10 03:00:27.194819', 'step': 19265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:27.248713', 'step': 19265, 'epoch': 3} {'type': 'loss', 'content': 0.13338550925254822, 'timestamp': '2025-09-10 03:00:27.250865', 'step': 19266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:27.304335', 'step': 19266, 'epoch': 3} {'type': 'loss', 'content': 0.09971009939908981, 'timestamp': '2025-09-10 03:00:27.306512', 'step': 19267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:27.359987', 'step': 19267, 'epoch': 3} {'type': 'loss', 'content': 0.1277228593826294, 'timestamp': '2025-09-10 03:00:27.366074', 'step': 19268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:27.419001', 'step': 19268, 'epoch': 3} {'type': 'loss', 'content': 0.08296411484479904, 'timestamp': '2025-09-10 03:00:27.421325', 'step': 19269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:27.475417', 'step': 19269, 'epoch': 3} {'type': 'loss', 'content': 0.12303053587675095, 'timestamp': '2025-09-10 03:00:27.477765', 'step': 19270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:27.533419', 'step': 19270, 'epoch': 3} {'type': 'loss', 'content': 0.07511766999959946, 'timestamp': '2025-09-10 03:00:27.535818', 'step': 19271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:27.589577', 'step': 19271, 'epoch': 3} {'type': 'loss', 'content': 0.12922942638397217, 'timestamp': '2025-09-10 03:00:27.595606', 'step': 19272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:27.648769', 'step': 19272, 'epoch': 3} {'type': 'loss', 'content': 0.09087201207876205, 'timestamp': '2025-09-10 03:00:27.650876', 'step': 19273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:27.704880', 'step': 19273, 'epoch': 3} {'type': 'loss', 'content': 0.08136123418807983, 'timestamp': '2025-09-10 03:00:27.706990', 'step': 19274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:27.761476', 'step': 19274, 'epoch': 3} {'type': 'loss', 'content': 0.12628315389156342, 'timestamp': '2025-09-10 03:00:27.763682', 'step': 19275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:27.818111', 'step': 19275, 'epoch': 3} {'type': 'loss', 'content': 0.03916752338409424, 'timestamp': '2025-09-10 03:00:27.824111', 'step': 19276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:27.877423', 'step': 19276, 'epoch': 3} {'type': 'loss', 'content': 0.1569005846977234, 'timestamp': '2025-09-10 03:00:27.879569', 'step': 19277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:27.933284', 'step': 19277, 'epoch': 3} {'type': 'loss', 'content': 0.12147227674722672, 'timestamp': '2025-09-10 03:00:27.936507', 'step': 19278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:27.990057', 'step': 19278, 'epoch': 3} {'type': 'loss', 'content': 0.17581002414226532, 'timestamp': '2025-09-10 03:00:27.992191', 'step': 19279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:28.045612', 'step': 19279, 'epoch': 3} {'type': 'loss', 'content': 0.0328729972243309, 'timestamp': '2025-09-10 03:00:28.051497', 'step': 19280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:28.104407', 'step': 19280, 'epoch': 3} {'type': 'loss', 'content': 0.029690248891711235, 'timestamp': '2025-09-10 03:00:28.106508', 'step': 19281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:28.159510', 'step': 19281, 'epoch': 3} {'type': 'loss', 'content': 0.1195608600974083, 'timestamp': '2025-09-10 03:00:28.161583', 'step': 19282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:28.215305', 'step': 19282, 'epoch': 3} {'type': 'loss', 'content': 0.03588941693305969, 'timestamp': '2025-09-10 03:00:28.217408', 'step': 19283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:28.272522', 'step': 19283, 'epoch': 3} {'type': 'loss', 'content': 0.08962605893611908, 'timestamp': '2025-09-10 03:00:28.278774', 'step': 19284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:28.332931', 'step': 19284, 'epoch': 3} {'type': 'loss', 'content': 0.08944444358348846, 'timestamp': '2025-09-10 03:00:28.335326', 'step': 19285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:28.391336', 'step': 19285, 'epoch': 3} {'type': 'loss', 'content': 0.10992857068777084, 'timestamp': '2025-09-10 03:00:28.393487', 'step': 19286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:28.449061', 'step': 19286, 'epoch': 3} {'type': 'loss', 'content': 0.08537865430116653, 'timestamp': '2025-09-10 03:00:28.451292', 'step': 19287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:28.506567', 'step': 19287, 'epoch': 3} {'type': 'loss', 'content': 0.14334166049957275, 'timestamp': '2025-09-10 03:00:28.513060', 'step': 19288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:28.567071', 'step': 19288, 'epoch': 3} {'type': 'loss', 'content': 0.08299572765827179, 'timestamp': '2025-09-10 03:00:28.569451', 'step': 19289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:28.623418', 'step': 19289, 'epoch': 3} {'type': 'loss', 'content': 0.2133033275604248, 'timestamp': '2025-09-10 03:00:28.625918', 'step': 19290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:28.679676', 'step': 19290, 'epoch': 3} {'type': 'loss', 'content': 0.1054525151848793, 'timestamp': '2025-09-10 03:00:28.681798', 'step': 19291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:28.735342', 'step': 19291, 'epoch': 3} {'type': 'loss', 'content': 0.13171479105949402, 'timestamp': '2025-09-10 03:00:28.741195', 'step': 19292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:28.794747', 'step': 19292, 'epoch': 3} {'type': 'loss', 'content': 0.08586034178733826, 'timestamp': '2025-09-10 03:00:28.796933', 'step': 19293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:28.850385', 'step': 19293, 'epoch': 3} {'type': 'loss', 'content': 0.18540886044502258, 'timestamp': '2025-09-10 03:00:28.852523', 'step': 19294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:28.908110', 'step': 19294, 'epoch': 3} {'type': 'loss', 'content': 0.12690620124340057, 'timestamp': '2025-09-10 03:00:28.910322', 'step': 19295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:28.966663', 'step': 19295, 'epoch': 3} {'type': 'loss', 'content': 0.037790343165397644, 'timestamp': '2025-09-10 03:00:28.972720', 'step': 19296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:29.025874', 'step': 19296, 'epoch': 3} {'type': 'loss', 'content': 0.04944169521331787, 'timestamp': '2025-09-10 03:00:29.027897', 'step': 19297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:29.081801', 'step': 19297, 'epoch': 3} {'type': 'loss', 'content': 0.06346943229436874, 'timestamp': '2025-09-10 03:00:29.084124', 'step': 19298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:29.138323', 'step': 19298, 'epoch': 3} {'type': 'loss', 'content': 0.12282612919807434, 'timestamp': '2025-09-10 03:00:29.140591', 'step': 19299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:29.193989', 'step': 19299, 'epoch': 3} {'type': 'loss', 'content': 0.10309566557407379, 'timestamp': '2025-09-10 03:00:29.199805', 'step': 19300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:29.252701', 'step': 19300, 'epoch': 3} {'type': 'loss', 'content': 0.14804308116436005, 'timestamp': '2025-09-10 03:00:29.254974', 'step': 19301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:29.308511', 'step': 19301, 'epoch': 3} {'type': 'loss', 'content': 0.1487690657377243, 'timestamp': '2025-09-10 03:00:29.310580', 'step': 19302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:29.364440', 'step': 19302, 'epoch': 3} {'type': 'loss', 'content': 0.06147855520248413, 'timestamp': '2025-09-10 03:00:29.366693', 'step': 19303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:29.420767', 'step': 19303, 'epoch': 3} {'type': 'loss', 'content': 0.04205791652202606, 'timestamp': '2025-09-10 03:00:29.426519', 'step': 19304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:29.479734', 'step': 19304, 'epoch': 3} {'type': 'loss', 'content': 0.12222874164581299, 'timestamp': '2025-09-10 03:00:29.481857', 'step': 19305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:29.537649', 'step': 19305, 'epoch': 3} {'type': 'loss', 'content': 0.1121811717748642, 'timestamp': '2025-09-10 03:00:29.539772', 'step': 19306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:29.593737', 'step': 19306, 'epoch': 3} {'type': 'loss', 'content': 0.11695697158575058, 'timestamp': '2025-09-10 03:00:29.595873', 'step': 19307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:29.654354', 'step': 19307, 'epoch': 3} {'type': 'loss', 'content': 0.041938330978155136, 'timestamp': '2025-09-10 03:00:29.660340', 'step': 19308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:29.719140', 'step': 19308, 'epoch': 3} {'type': 'loss', 'content': 0.11611301451921463, 'timestamp': '2025-09-10 03:00:29.721334', 'step': 19309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:29.774294', 'step': 19309, 'epoch': 3} {'type': 'loss', 'content': 0.07806016504764557, 'timestamp': '2025-09-10 03:00:29.776402', 'step': 19310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:29.829401', 'step': 19310, 'epoch': 3} {'type': 'loss', 'content': 0.0997445359826088, 'timestamp': '2025-09-10 03:00:29.831526', 'step': 19311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:29.884396', 'step': 19311, 'epoch': 3} {'type': 'loss', 'content': 0.11410897225141525, 'timestamp': '2025-09-10 03:00:29.890428', 'step': 19312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:29.944804', 'step': 19312, 'epoch': 3} {'type': 'loss', 'content': 0.02462882548570633, 'timestamp': '2025-09-10 03:00:29.947026', 'step': 19313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:30.000966', 'step': 19313, 'epoch': 3} {'type': 'loss', 'content': 0.10848693549633026, 'timestamp': '2025-09-10 03:00:30.003196', 'step': 19314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:30.065230', 'step': 19314, 'epoch': 3} {'type': 'loss', 'content': 0.1092022955417633, 'timestamp': '2025-09-10 03:00:30.067374', 'step': 19315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:30.122447', 'step': 19315, 'epoch': 3} {'type': 'loss', 'content': 0.15843448042869568, 'timestamp': '2025-09-10 03:00:30.128524', 'step': 19316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:30.183242', 'step': 19316, 'epoch': 3} {'type': 'loss', 'content': 0.07101057469844818, 'timestamp': '2025-09-10 03:00:30.185332', 'step': 19317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:30.238984', 'step': 19317, 'epoch': 3} {'type': 'loss', 'content': 0.10129246115684509, 'timestamp': '2025-09-10 03:00:30.241125', 'step': 19318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:30.294822', 'step': 19318, 'epoch': 3} {'type': 'loss', 'content': 0.07304983586072922, 'timestamp': '2025-09-10 03:00:30.296968', 'step': 19319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:30.351259', 'step': 19319, 'epoch': 3} {'type': 'loss', 'content': 0.1401260942220688, 'timestamp': '2025-09-10 03:00:30.358576', 'step': 19320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:30.418601', 'step': 19320, 'epoch': 3} {'type': 'loss', 'content': 0.09220048040151596, 'timestamp': '2025-09-10 03:00:30.420760', 'step': 19321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:30.476836', 'step': 19321, 'epoch': 3} {'type': 'loss', 'content': 0.19884797930717468, 'timestamp': '2025-09-10 03:00:30.478984', 'step': 19322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:30.532521', 'step': 19322, 'epoch': 3} {'type': 'loss', 'content': 0.08942124992609024, 'timestamp': '2025-09-10 03:00:30.534468', 'step': 19323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:30.589341', 'step': 19323, 'epoch': 3} {'type': 'loss', 'content': 0.0982823446393013, 'timestamp': '2025-09-10 03:00:30.595553', 'step': 19324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:30.652134', 'step': 19324, 'epoch': 3} {'type': 'loss', 'content': 0.05108008161187172, 'timestamp': '2025-09-10 03:00:30.654316', 'step': 19325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:30.707530', 'step': 19325, 'epoch': 3} {'type': 'loss', 'content': 0.1113455668091774, 'timestamp': '2025-09-10 03:00:30.709636', 'step': 19326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:30.764282', 'step': 19326, 'epoch': 3} {'type': 'loss', 'content': 0.05392695963382721, 'timestamp': '2025-09-10 03:00:30.766286', 'step': 19327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:30.820509', 'step': 19327, 'epoch': 3} {'type': 'loss', 'content': 0.10102873295545578, 'timestamp': '2025-09-10 03:00:30.826705', 'step': 19328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:30.878852', 'step': 19328, 'epoch': 3} {'type': 'loss', 'content': 0.056187938898801804, 'timestamp': '2025-09-10 03:00:30.880941', 'step': 19329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:30.934374', 'step': 19329, 'epoch': 3} {'type': 'loss', 'content': 0.05828026682138443, 'timestamp': '2025-09-10 03:00:30.936658', 'step': 19330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:30.990491', 'step': 19330, 'epoch': 3} {'type': 'loss', 'content': 0.137534961104393, 'timestamp': '2025-09-10 03:00:30.992811', 'step': 19331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:31.045997', 'step': 19331, 'epoch': 3} {'type': 'loss', 'content': 0.09182330220937729, 'timestamp': '2025-09-10 03:00:31.052606', 'step': 19332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:31.106693', 'step': 19332, 'epoch': 3} {'type': 'loss', 'content': 0.1355358213186264, 'timestamp': '2025-09-10 03:00:31.108920', 'step': 19333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:31.164439', 'step': 19333, 'epoch': 3} {'type': 'loss', 'content': 0.2047376036643982, 'timestamp': '2025-09-10 03:00:31.166659', 'step': 19334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:31.220618', 'step': 19334, 'epoch': 3} {'type': 'loss', 'content': 0.017709646373987198, 'timestamp': '2025-09-10 03:00:31.222746', 'step': 19335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:31.276614', 'step': 19335, 'epoch': 3} {'type': 'loss', 'content': 0.09141869843006134, 'timestamp': '2025-09-10 03:00:31.283282', 'step': 19336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:31.336824', 'step': 19336, 'epoch': 3} {'type': 'loss', 'content': 0.0894203633069992, 'timestamp': '2025-09-10 03:00:31.338933', 'step': 19337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:31.392443', 'step': 19337, 'epoch': 3} {'type': 'loss', 'content': 0.10410518944263458, 'timestamp': '2025-09-10 03:00:31.394568', 'step': 19338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:31.448306', 'step': 19338, 'epoch': 3} {'type': 'loss', 'content': 0.1206139400601387, 'timestamp': '2025-09-10 03:00:31.450444', 'step': 19339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:31.504822', 'step': 19339, 'epoch': 3} {'type': 'loss', 'content': 0.12866629660129547, 'timestamp': '2025-09-10 03:00:31.510762', 'step': 19340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:31.565374', 'step': 19340, 'epoch': 3} {'type': 'loss', 'content': 0.11452005803585052, 'timestamp': '2025-09-10 03:00:31.567775', 'step': 19341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:31.622017', 'step': 19341, 'epoch': 3} {'type': 'loss', 'content': 0.14081329107284546, 'timestamp': '2025-09-10 03:00:31.624313', 'step': 19342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:31.678736', 'step': 19342, 'epoch': 3} {'type': 'loss', 'content': 0.1469295620918274, 'timestamp': '2025-09-10 03:00:31.680776', 'step': 19343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:31.734532', 'step': 19343, 'epoch': 3} {'type': 'loss', 'content': 0.11056658625602722, 'timestamp': '2025-09-10 03:00:31.740486', 'step': 19344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:31.795131', 'step': 19344, 'epoch': 3} {'type': 'loss', 'content': 0.08133518695831299, 'timestamp': '2025-09-10 03:00:31.797320', 'step': 19345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:31.851657', 'step': 19345, 'epoch': 3} {'type': 'loss', 'content': 0.15859727561473846, 'timestamp': '2025-09-10 03:00:31.853856', 'step': 19346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:31.907688', 'step': 19346, 'epoch': 3} {'type': 'loss', 'content': 0.07187405973672867, 'timestamp': '2025-09-10 03:00:31.909731', 'step': 19347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:31.965115', 'step': 19347, 'epoch': 3} {'type': 'loss', 'content': 0.03782949224114418, 'timestamp': '2025-09-10 03:00:31.971222', 'step': 19348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:32.024611', 'step': 19348, 'epoch': 3} {'type': 'loss', 'content': 0.13325470685958862, 'timestamp': '2025-09-10 03:00:32.026664', 'step': 19349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:32.080297', 'step': 19349, 'epoch': 3} {'type': 'loss', 'content': 0.05840630084276199, 'timestamp': '2025-09-10 03:00:32.082303', 'step': 19350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:32.135769', 'step': 19350, 'epoch': 3} {'type': 'loss', 'content': 0.07570651918649673, 'timestamp': '2025-09-10 03:00:32.137764', 'step': 19351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:32.192032', 'step': 19351, 'epoch': 3} {'type': 'loss', 'content': 0.11286164075136185, 'timestamp': '2025-09-10 03:00:32.198259', 'step': 19352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:32.251326', 'step': 19352, 'epoch': 3} {'type': 'loss', 'content': 0.053793758153915405, 'timestamp': '2025-09-10 03:00:32.253460', 'step': 19353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:32.306495', 'step': 19353, 'epoch': 3} {'type': 'loss', 'content': 0.09854759275913239, 'timestamp': '2025-09-10 03:00:32.308586', 'step': 19354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:32.363993', 'step': 19354, 'epoch': 3} {'type': 'loss', 'content': 0.19380219280719757, 'timestamp': '2025-09-10 03:00:32.366213', 'step': 19355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:32.420281', 'step': 19355, 'epoch': 3} {'type': 'loss', 'content': 0.041410140693187714, 'timestamp': '2025-09-10 03:00:32.426462', 'step': 19356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:32.479208', 'step': 19356, 'epoch': 3} {'type': 'loss', 'content': 0.13774815201759338, 'timestamp': '2025-09-10 03:00:32.481535', 'step': 19357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:32.534424', 'step': 19357, 'epoch': 3} {'type': 'loss', 'content': 0.05769999325275421, 'timestamp': '2025-09-10 03:00:32.536540', 'step': 19358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:32.590707', 'step': 19358, 'epoch': 3} {'type': 'loss', 'content': 0.07450594007968903, 'timestamp': '2025-09-10 03:00:32.592836', 'step': 19359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:32.646378', 'step': 19359, 'epoch': 3} {'type': 'loss', 'content': 0.0591127909719944, 'timestamp': '2025-09-10 03:00:32.652238', 'step': 19360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:32.705973', 'step': 19360, 'epoch': 3} {'type': 'loss', 'content': 0.14367379248142242, 'timestamp': '2025-09-10 03:00:32.708119', 'step': 19361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:32.762711', 'step': 19361, 'epoch': 3} {'type': 'loss', 'content': 0.11870022118091583, 'timestamp': '2025-09-10 03:00:32.764865', 'step': 19362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:32.820724', 'step': 19362, 'epoch': 3} {'type': 'loss', 'content': 0.07218876481056213, 'timestamp': '2025-09-10 03:00:32.822756', 'step': 19363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:32.876374', 'step': 19363, 'epoch': 3} {'type': 'loss', 'content': 0.1638469099998474, 'timestamp': '2025-09-10 03:00:32.882395', 'step': 19364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:32.935592', 'step': 19364, 'epoch': 3} {'type': 'loss', 'content': 0.1032058596611023, 'timestamp': '2025-09-10 03:00:32.937547', 'step': 19365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:32.990992', 'step': 19365, 'epoch': 3} {'type': 'loss', 'content': 0.08156108111143112, 'timestamp': '2025-09-10 03:00:32.993110', 'step': 19366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:33.046703', 'step': 19366, 'epoch': 3} {'type': 'loss', 'content': 0.06922341883182526, 'timestamp': '2025-09-10 03:00:33.048846', 'step': 19367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:33.102467', 'step': 19367, 'epoch': 3} {'type': 'loss', 'content': 0.06268078088760376, 'timestamp': '2025-09-10 03:00:33.108313', 'step': 19368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:33.160661', 'step': 19368, 'epoch': 3} {'type': 'loss', 'content': 0.07648418843746185, 'timestamp': '2025-09-10 03:00:33.162698', 'step': 19369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:33.215599', 'step': 19369, 'epoch': 3} {'type': 'loss', 'content': 0.13679829239845276, 'timestamp': '2025-09-10 03:00:33.217895', 'step': 19370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:33.272197', 'step': 19370, 'epoch': 3} {'type': 'loss', 'content': 0.11681573837995529, 'timestamp': '2025-09-10 03:00:33.274567', 'step': 19371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:33.327671', 'step': 19371, 'epoch': 3} {'type': 'loss', 'content': 0.0590510293841362, 'timestamp': '2025-09-10 03:00:33.333731', 'step': 19372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:33.386618', 'step': 19372, 'epoch': 3} {'type': 'loss', 'content': 0.09763524681329727, 'timestamp': '2025-09-10 03:00:33.389926', 'step': 19373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:33.444391', 'step': 19373, 'epoch': 3} {'type': 'loss', 'content': 0.08933226764202118, 'timestamp': '2025-09-10 03:00:33.446494', 'step': 19374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:33.499503', 'step': 19374, 'epoch': 3} {'type': 'loss', 'content': 0.1874382644891739, 'timestamp': '2025-09-10 03:00:33.501610', 'step': 19375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:33.555106', 'step': 19375, 'epoch': 3} {'type': 'loss', 'content': 0.05997571349143982, 'timestamp': '2025-09-10 03:00:33.561078', 'step': 19376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:33.614168', 'step': 19376, 'epoch': 3} {'type': 'loss', 'content': 0.06338448077440262, 'timestamp': '2025-09-10 03:00:33.616292', 'step': 19377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:33.670395', 'step': 19377, 'epoch': 3} {'type': 'loss', 'content': 0.10662161558866501, 'timestamp': '2025-09-10 03:00:33.672520', 'step': 19378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:33.725691', 'step': 19378, 'epoch': 3} {'type': 'loss', 'content': 0.05767522752285004, 'timestamp': '2025-09-10 03:00:33.727813', 'step': 19379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:33.781794', 'step': 19379, 'epoch': 3} {'type': 'loss', 'content': 0.13775677978992462, 'timestamp': '2025-09-10 03:00:33.787844', 'step': 19380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:33.840972', 'step': 19380, 'epoch': 3} {'type': 'loss', 'content': 0.09325675666332245, 'timestamp': '2025-09-10 03:00:33.843189', 'step': 19381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:33.896345', 'step': 19381, 'epoch': 3} {'type': 'loss', 'content': 0.06497395038604736, 'timestamp': '2025-09-10 03:00:33.898460', 'step': 19382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:33.952025', 'step': 19382, 'epoch': 3} {'type': 'loss', 'content': 0.1268409937620163, 'timestamp': '2025-09-10 03:00:33.954021', 'step': 19383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:34.007419', 'step': 19383, 'epoch': 3} {'type': 'loss', 'content': 0.12329331785440445, 'timestamp': '2025-09-10 03:00:34.013365', 'step': 19384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:34.066374', 'step': 19384, 'epoch': 3} {'type': 'loss', 'content': 0.0398913212120533, 'timestamp': '2025-09-10 03:00:34.068503', 'step': 19385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:34.121445', 'step': 19385, 'epoch': 3} {'type': 'loss', 'content': 0.1080511286854744, 'timestamp': '2025-09-10 03:00:34.123721', 'step': 19386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:34.177251', 'step': 19386, 'epoch': 3} {'type': 'loss', 'content': 0.11177822947502136, 'timestamp': '2025-09-10 03:00:34.179411', 'step': 19387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 03:00:34.233091', 'step': 19387, 'epoch': 3} {'type': 'loss', 'content': 0.06874307245016098, 'timestamp': '2025-09-10 03:00:34.238860', 'step': 19388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:34.291535', 'step': 19388, 'epoch': 3} {'type': 'loss', 'content': 0.03942115232348442, 'timestamp': '2025-09-10 03:00:34.293652', 'step': 19389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:34.347714', 'step': 19389, 'epoch': 3} {'type': 'loss', 'content': 0.07408677786588669, 'timestamp': '2025-09-10 03:00:34.349821', 'step': 19390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:34.403870', 'step': 19390, 'epoch': 3} {'type': 'loss', 'content': 0.12523435056209564, 'timestamp': '2025-09-10 03:00:34.406052', 'step': 19391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:34.458996', 'step': 19391, 'epoch': 3} {'type': 'loss', 'content': 0.0688554048538208, 'timestamp': '2025-09-10 03:00:34.465018', 'step': 19392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:34.525647', 'step': 19392, 'epoch': 3} {'type': 'loss', 'content': 0.15854832530021667, 'timestamp': '2025-09-10 03:00:34.527747', 'step': 19393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:34.586885', 'step': 19393, 'epoch': 3} {'type': 'loss', 'content': 0.032404620200395584, 'timestamp': '2025-09-10 03:00:34.590586', 'step': 19394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:34.644182', 'step': 19394, 'epoch': 3} {'type': 'loss', 'content': 0.10803267359733582, 'timestamp': '2025-09-10 03:00:34.646382', 'step': 19395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:34.700282', 'step': 19395, 'epoch': 3} {'type': 'loss', 'content': 0.08620954304933548, 'timestamp': '2025-09-10 03:00:34.706200', 'step': 19396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:34.759176', 'step': 19396, 'epoch': 3} {'type': 'loss', 'content': 0.0765753835439682, 'timestamp': '2025-09-10 03:00:34.761690', 'step': 19397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:34.818847', 'step': 19397, 'epoch': 3} {'type': 'loss', 'content': 0.06615430116653442, 'timestamp': '2025-09-10 03:00:34.821028', 'step': 19398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:34.875173', 'step': 19398, 'epoch': 3} {'type': 'loss', 'content': 0.04990190267562866, 'timestamp': '2025-09-10 03:00:34.877487', 'step': 19399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:34.952935', 'step': 19399, 'epoch': 3} {'type': 'loss', 'content': 0.11767024546861649, 'timestamp': '2025-09-10 03:00:34.958988', 'step': 19400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:35.013141', 'step': 19400, 'epoch': 3} {'type': 'loss', 'content': 0.08887150883674622, 'timestamp': '2025-09-10 03:00:35.015469', 'step': 19401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:35.070686', 'step': 19401, 'epoch': 3} {'type': 'loss', 'content': 0.046946026384830475, 'timestamp': '2025-09-10 03:00:35.075657', 'step': 19402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:35.129305', 'step': 19402, 'epoch': 3} {'type': 'loss', 'content': 0.05656643584370613, 'timestamp': '2025-09-10 03:00:35.131486', 'step': 19403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:35.187361', 'step': 19403, 'epoch': 3} {'type': 'loss', 'content': 0.09898630529642105, 'timestamp': '2025-09-10 03:00:35.199204', 'step': 19404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:35.256145', 'step': 19404, 'epoch': 3} {'type': 'loss', 'content': 0.034532226622104645, 'timestamp': '2025-09-10 03:00:35.258190', 'step': 19405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:35.313308', 'step': 19405, 'epoch': 3} {'type': 'loss', 'content': 0.12246938794851303, 'timestamp': '2025-09-10 03:00:35.315404', 'step': 19406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:35.369326', 'step': 19406, 'epoch': 3} {'type': 'loss', 'content': 0.09997542947530746, 'timestamp': '2025-09-10 03:00:35.371497', 'step': 19407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:35.427171', 'step': 19407, 'epoch': 3} {'type': 'loss', 'content': 0.07536725699901581, 'timestamp': '2025-09-10 03:00:35.433055', 'step': 19408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:35.486252', 'step': 19408, 'epoch': 3} {'type': 'loss', 'content': 0.05103730410337448, 'timestamp': '2025-09-10 03:00:35.488194', 'step': 19409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:35.541330', 'step': 19409, 'epoch': 3} {'type': 'loss', 'content': 0.08510980755090714, 'timestamp': '2025-09-10 03:00:35.543461', 'step': 19410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:35.597236', 'step': 19410, 'epoch': 3} {'type': 'loss', 'content': 0.06344027072191238, 'timestamp': '2025-09-10 03:00:35.599193', 'step': 19411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:35.651828', 'step': 19411, 'epoch': 3} {'type': 'loss', 'content': 0.05169808864593506, 'timestamp': '2025-09-10 03:00:35.657797', 'step': 19412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:35.711821', 'step': 19412, 'epoch': 3} {'type': 'loss', 'content': 0.1064823642373085, 'timestamp': '2025-09-10 03:00:35.714072', 'step': 19413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:35.767361', 'step': 19413, 'epoch': 3} {'type': 'loss', 'content': 0.07950076460838318, 'timestamp': '2025-09-10 03:00:35.769631', 'step': 19414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:35.823529', 'step': 19414, 'epoch': 3} {'type': 'loss', 'content': 0.10051168501377106, 'timestamp': '2025-09-10 03:00:35.825630', 'step': 19415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:35.879412', 'step': 19415, 'epoch': 3} {'type': 'loss', 'content': 0.09325350075960159, 'timestamp': '2025-09-10 03:00:35.885335', 'step': 19416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:35.940133', 'step': 19416, 'epoch': 3} {'type': 'loss', 'content': 0.17553222179412842, 'timestamp': '2025-09-10 03:00:35.942317', 'step': 19417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:35.996079', 'step': 19417, 'epoch': 3} {'type': 'loss', 'content': 0.13957253098487854, 'timestamp': '2025-09-10 03:00:35.998222', 'step': 19418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:36.052006', 'step': 19418, 'epoch': 3} {'type': 'loss', 'content': 0.1599089503288269, 'timestamp': '2025-09-10 03:00:36.054467', 'step': 19419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:36.107459', 'step': 19419, 'epoch': 3} {'type': 'loss', 'content': 0.028665004298090935, 'timestamp': '2025-09-10 03:00:36.113344', 'step': 19420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:36.166326', 'step': 19420, 'epoch': 3} {'type': 'loss', 'content': 0.11601913720369339, 'timestamp': '2025-09-10 03:00:36.168284', 'step': 19421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:36.222436', 'step': 19421, 'epoch': 3} {'type': 'loss', 'content': 0.0646614283323288, 'timestamp': '2025-09-10 03:00:36.224610', 'step': 19422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:36.277912', 'step': 19422, 'epoch': 3} {'type': 'loss', 'content': 0.15427348017692566, 'timestamp': '2025-09-10 03:00:36.280299', 'step': 19423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:36.333702', 'step': 19423, 'epoch': 3} {'type': 'loss', 'content': 0.1359635442495346, 'timestamp': '2025-09-10 03:00:36.339653', 'step': 19424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:36.392604', 'step': 19424, 'epoch': 3} {'type': 'loss', 'content': 0.11071476340293884, 'timestamp': '2025-09-10 03:00:36.395065', 'step': 19425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:36.449661', 'step': 19425, 'epoch': 3} {'type': 'loss', 'content': 0.06580854952335358, 'timestamp': '2025-09-10 03:00:36.451942', 'step': 19426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:36.506436', 'step': 19426, 'epoch': 3} {'type': 'loss', 'content': 0.15620188415050507, 'timestamp': '2025-09-10 03:00:36.508736', 'step': 19427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:36.562763', 'step': 19427, 'epoch': 3} {'type': 'loss', 'content': 0.11218436062335968, 'timestamp': '2025-09-10 03:00:36.568868', 'step': 19428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:36.621756', 'step': 19428, 'epoch': 3} {'type': 'loss', 'content': 0.09093041718006134, 'timestamp': '2025-09-10 03:00:36.623752', 'step': 19429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:36.677891', 'step': 19429, 'epoch': 3} {'type': 'loss', 'content': 0.12345092743635178, 'timestamp': '2025-09-10 03:00:36.680015', 'step': 19430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:36.732843', 'step': 19430, 'epoch': 3} {'type': 'loss', 'content': 0.109400175511837, 'timestamp': '2025-09-10 03:00:36.735001', 'step': 19431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:36.788423', 'step': 19431, 'epoch': 3} {'type': 'loss', 'content': 0.10509171336889267, 'timestamp': '2025-09-10 03:00:36.794319', 'step': 19432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:36.847217', 'step': 19432, 'epoch': 3} {'type': 'loss', 'content': 0.11406732350587845, 'timestamp': '2025-09-10 03:00:36.849331', 'step': 19433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:36.903356', 'step': 19433, 'epoch': 3} {'type': 'loss', 'content': 0.15608039498329163, 'timestamp': '2025-09-10 03:00:36.905520', 'step': 19434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:36.959800', 'step': 19434, 'epoch': 3} {'type': 'loss', 'content': 0.09400106966495514, 'timestamp': '2025-09-10 03:00:36.961981', 'step': 19435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:37.016036', 'step': 19435, 'epoch': 3} {'type': 'loss', 'content': 0.06565388292074203, 'timestamp': '2025-09-10 03:00:37.022053', 'step': 19436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:37.075751', 'step': 19436, 'epoch': 3} {'type': 'loss', 'content': 0.0804896131157875, 'timestamp': '2025-09-10 03:00:37.077776', 'step': 19437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:37.131660', 'step': 19437, 'epoch': 3} {'type': 'loss', 'content': 0.11552967131137848, 'timestamp': '2025-09-10 03:00:37.133644', 'step': 19438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:37.187481', 'step': 19438, 'epoch': 3} {'type': 'loss', 'content': 0.08881726861000061, 'timestamp': '2025-09-10 03:00:37.189864', 'step': 19439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:37.243056', 'step': 19439, 'epoch': 3} {'type': 'loss', 'content': 0.09735358506441116, 'timestamp': '2025-09-10 03:00:37.248873', 'step': 19440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:37.301143', 'step': 19440, 'epoch': 3} {'type': 'loss', 'content': 0.08236874639987946, 'timestamp': '2025-09-10 03:00:37.303324', 'step': 19441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:37.356733', 'step': 19441, 'epoch': 3} {'type': 'loss', 'content': 0.11718128621578217, 'timestamp': '2025-09-10 03:00:37.358954', 'step': 19442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:37.412897', 'step': 19442, 'epoch': 3} {'type': 'loss', 'content': 0.07923156768083572, 'timestamp': '2025-09-10 03:00:37.415226', 'step': 19443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:37.469690', 'step': 19443, 'epoch': 3} {'type': 'loss', 'content': 0.06595730036497116, 'timestamp': '2025-09-10 03:00:37.475572', 'step': 19444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:37.528104', 'step': 19444, 'epoch': 3} {'type': 'loss', 'content': 0.03202725574374199, 'timestamp': '2025-09-10 03:00:37.530229', 'step': 19445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:37.584910', 'step': 19445, 'epoch': 3} {'type': 'loss', 'content': 0.07652465254068375, 'timestamp': '2025-09-10 03:00:37.587183', 'step': 19446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:37.643244', 'step': 19446, 'epoch': 3} {'type': 'loss', 'content': 0.1042274534702301, 'timestamp': '2025-09-10 03:00:37.645424', 'step': 19447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:37.699691', 'step': 19447, 'epoch': 3} {'type': 'loss', 'content': 0.05206098407506943, 'timestamp': '2025-09-10 03:00:37.705613', 'step': 19448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:37.759326', 'step': 19448, 'epoch': 3} {'type': 'loss', 'content': 0.12617699801921844, 'timestamp': '2025-09-10 03:00:37.761483', 'step': 19449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:37.814610', 'step': 19449, 'epoch': 3} {'type': 'loss', 'content': 0.0964844599366188, 'timestamp': '2025-09-10 03:00:37.816790', 'step': 19450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:37.869967', 'step': 19450, 'epoch': 3} {'type': 'loss', 'content': 0.06490867584943771, 'timestamp': '2025-09-10 03:00:37.872118', 'step': 19451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:37.925803', 'step': 19451, 'epoch': 3} {'type': 'loss', 'content': 0.09260880202054977, 'timestamp': '2025-09-10 03:00:37.931820', 'step': 19452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:37.984659', 'step': 19452, 'epoch': 3} {'type': 'loss', 'content': 0.15295936167240143, 'timestamp': '2025-09-10 03:00:37.986830', 'step': 19453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:38.040353', 'step': 19453, 'epoch': 3} {'type': 'loss', 'content': 0.18755200505256653, 'timestamp': '2025-09-10 03:00:38.042509', 'step': 19454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:38.097638', 'step': 19454, 'epoch': 3} {'type': 'loss', 'content': 0.08077234774827957, 'timestamp': '2025-09-10 03:00:38.099920', 'step': 19455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:38.154409', 'step': 19455, 'epoch': 3} {'type': 'loss', 'content': 0.09998404234647751, 'timestamp': '2025-09-10 03:00:38.160712', 'step': 19456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:38.214839', 'step': 19456, 'epoch': 3} {'type': 'loss', 'content': 0.07318864017724991, 'timestamp': '2025-09-10 03:00:38.217088', 'step': 19457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:38.270796', 'step': 19457, 'epoch': 3} {'type': 'loss', 'content': 0.14819033443927765, 'timestamp': '2025-09-10 03:00:38.272789', 'step': 19458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:38.326337', 'step': 19458, 'epoch': 3} {'type': 'loss', 'content': 0.06150343269109726, 'timestamp': '2025-09-10 03:00:38.328584', 'step': 19459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:38.382710', 'step': 19459, 'epoch': 3} {'type': 'loss', 'content': 0.07761890441179276, 'timestamp': '2025-09-10 03:00:38.388930', 'step': 19460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:38.441898', 'step': 19460, 'epoch': 3} {'type': 'loss', 'content': 0.10956946015357971, 'timestamp': '2025-09-10 03:00:38.444058', 'step': 19461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:00:38.497196', 'step': 19461, 'epoch': 3} {'type': 'loss', 'content': 0.046182699501514435, 'timestamp': '2025-09-10 03:00:38.499379', 'step': 19462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:38.555417', 'step': 19462, 'epoch': 3} {'type': 'loss', 'content': 0.18411311507225037, 'timestamp': '2025-09-10 03:00:38.557569', 'step': 19463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:38.612762', 'step': 19463, 'epoch': 3} {'type': 'loss', 'content': 0.030273929238319397, 'timestamp': '2025-09-10 03:00:38.618993', 'step': 19464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:38.674397', 'step': 19464, 'epoch': 3} {'type': 'loss', 'content': 0.14150963723659515, 'timestamp': '2025-09-10 03:00:38.676498', 'step': 19465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:38.730627', 'step': 19465, 'epoch': 3} {'type': 'loss', 'content': 0.06479093432426453, 'timestamp': '2025-09-10 03:00:38.732759', 'step': 19466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:38.786351', 'step': 19466, 'epoch': 3} {'type': 'loss', 'content': 0.07763280719518661, 'timestamp': '2025-09-10 03:00:38.788468', 'step': 19467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:38.841914', 'step': 19467, 'epoch': 3} {'type': 'loss', 'content': 0.10866725444793701, 'timestamp': '2025-09-10 03:00:38.847957', 'step': 19468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:38.901125', 'step': 19468, 'epoch': 3} {'type': 'loss', 'content': 0.07687100023031235, 'timestamp': '2025-09-10 03:00:38.903324', 'step': 19469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:38.957801', 'step': 19469, 'epoch': 3} {'type': 'loss', 'content': 0.12140027433633804, 'timestamp': '2025-09-10 03:00:38.960047', 'step': 19470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:39.014204', 'step': 19470, 'epoch': 3} {'type': 'loss', 'content': 0.02184215560555458, 'timestamp': '2025-09-10 03:00:39.016411', 'step': 19471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:39.069604', 'step': 19471, 'epoch': 3} {'type': 'loss', 'content': 0.058160144835710526, 'timestamp': '2025-09-10 03:00:39.075494', 'step': 19472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:39.128278', 'step': 19472, 'epoch': 3} {'type': 'loss', 'content': 0.07689959555864334, 'timestamp': '2025-09-10 03:00:39.130321', 'step': 19473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:39.185708', 'step': 19473, 'epoch': 3} {'type': 'loss', 'content': 0.09924007952213287, 'timestamp': '2025-09-10 03:00:39.187736', 'step': 19474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:39.242540', 'step': 19474, 'epoch': 3} {'type': 'loss', 'content': 0.09341373294591904, 'timestamp': '2025-09-10 03:00:39.244474', 'step': 19475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:39.298620', 'step': 19475, 'epoch': 3} {'type': 'loss', 'content': 0.03465660288929939, 'timestamp': '2025-09-10 03:00:39.304600', 'step': 19476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:39.357973', 'step': 19476, 'epoch': 3} {'type': 'loss', 'content': 0.11078213900327682, 'timestamp': '2025-09-10 03:00:39.360133', 'step': 19477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:39.413843', 'step': 19477, 'epoch': 3} {'type': 'loss', 'content': 0.052795641124248505, 'timestamp': '2025-09-10 03:00:39.415945', 'step': 19478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:39.469773', 'step': 19478, 'epoch': 3} {'type': 'loss', 'content': 0.04893381521105766, 'timestamp': '2025-09-10 03:00:39.471929', 'step': 19479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:39.526682', 'step': 19479, 'epoch': 3} {'type': 'loss', 'content': 0.053121428936719894, 'timestamp': '2025-09-10 03:00:39.532741', 'step': 19480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:39.585969', 'step': 19480, 'epoch': 3} {'type': 'loss', 'content': 0.09803702682256699, 'timestamp': '2025-09-10 03:00:39.588108', 'step': 19481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:39.642540', 'step': 19481, 'epoch': 3} {'type': 'loss', 'content': 0.05799531191587448, 'timestamp': '2025-09-10 03:00:39.644779', 'step': 19482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:39.699871', 'step': 19482, 'epoch': 3} {'type': 'loss', 'content': 0.08466420322656631, 'timestamp': '2025-09-10 03:00:39.702023', 'step': 19483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:39.757345', 'step': 19483, 'epoch': 3} {'type': 'loss', 'content': 0.12595148384571075, 'timestamp': '2025-09-10 03:00:39.763500', 'step': 19484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:39.818984', 'step': 19484, 'epoch': 3} {'type': 'loss', 'content': 0.10571946948766708, 'timestamp': '2025-09-10 03:00:39.821217', 'step': 19485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:39.875867', 'step': 19485, 'epoch': 3} {'type': 'loss', 'content': 0.04048123583197594, 'timestamp': '2025-09-10 03:00:39.878274', 'step': 19486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:39.932317', 'step': 19486, 'epoch': 3} {'type': 'loss', 'content': 0.1257699877023697, 'timestamp': '2025-09-10 03:00:39.934441', 'step': 19487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:39.988344', 'step': 19487, 'epoch': 3} {'type': 'loss', 'content': 0.03650682047009468, 'timestamp': '2025-09-10 03:00:39.994305', 'step': 19488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:40.047880', 'step': 19488, 'epoch': 3} {'type': 'loss', 'content': 0.12096639722585678, 'timestamp': '2025-09-10 03:00:40.050111', 'step': 19489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:40.104370', 'step': 19489, 'epoch': 3} {'type': 'loss', 'content': 0.1359439492225647, 'timestamp': '2025-09-10 03:00:40.106494', 'step': 19490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:40.159950', 'step': 19490, 'epoch': 3} {'type': 'loss', 'content': 0.041268788278102875, 'timestamp': '2025-09-10 03:00:40.162152', 'step': 19491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:40.216149', 'step': 19491, 'epoch': 3} {'type': 'loss', 'content': 0.08214907348155975, 'timestamp': '2025-09-10 03:00:40.222415', 'step': 19492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:40.278213', 'step': 19492, 'epoch': 3} {'type': 'loss', 'content': 0.09471093118190765, 'timestamp': '2025-09-10 03:00:40.280316', 'step': 19493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:40.333990', 'step': 19493, 'epoch': 3} {'type': 'loss', 'content': 0.06497187912464142, 'timestamp': '2025-09-10 03:00:40.336199', 'step': 19494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:40.390835', 'step': 19494, 'epoch': 3} {'type': 'loss', 'content': 0.07847592234611511, 'timestamp': '2025-09-10 03:00:40.393035', 'step': 19495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:40.447603', 'step': 19495, 'epoch': 3} {'type': 'loss', 'content': 0.03471704199910164, 'timestamp': '2025-09-10 03:00:40.453693', 'step': 19496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:40.506526', 'step': 19496, 'epoch': 3} {'type': 'loss', 'content': 0.11143159866333008, 'timestamp': '2025-09-10 03:00:40.508728', 'step': 19497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:40.563731', 'step': 19497, 'epoch': 3} {'type': 'loss', 'content': 0.07826005667448044, 'timestamp': '2025-09-10 03:00:40.565876', 'step': 19498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:40.620135', 'step': 19498, 'epoch': 3} {'type': 'loss', 'content': 0.025660114362835884, 'timestamp': '2025-09-10 03:00:40.622306', 'step': 19499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:40.676211', 'step': 19499, 'epoch': 3} {'type': 'loss', 'content': 0.04917164146900177, 'timestamp': '2025-09-10 03:00:40.682316', 'step': 19500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19500', 'timestamp': '2025-09-10 03:00:41.102425', 'step': 19500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:41.168694', 'step': 19500, 'epoch': 3} {'type': 'loss', 'content': 0.06645888090133667, 'timestamp': '2025-09-10 03:00:41.170989', 'step': 19501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:41.226194', 'step': 19501, 'epoch': 3} {'type': 'loss', 'content': 0.10121636837720871, 'timestamp': '2025-09-10 03:00:41.228538', 'step': 19502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:41.282881', 'step': 19502, 'epoch': 3} {'type': 'loss', 'content': 0.15059296786785126, 'timestamp': '2025-09-10 03:00:41.285195', 'step': 19503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:41.339697', 'step': 19503, 'epoch': 3} {'type': 'loss', 'content': 0.04041995480656624, 'timestamp': '2025-09-10 03:00:41.345870', 'step': 19504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:41.398869', 'step': 19504, 'epoch': 3} {'type': 'loss', 'content': 0.14615164697170258, 'timestamp': '2025-09-10 03:00:41.401180', 'step': 19505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:41.454207', 'step': 19505, 'epoch': 3} {'type': 'loss', 'content': 0.05390041694045067, 'timestamp': '2025-09-10 03:00:41.456357', 'step': 19506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:41.509822', 'step': 19506, 'epoch': 3} {'type': 'loss', 'content': 0.07047408074140549, 'timestamp': '2025-09-10 03:00:41.511899', 'step': 19507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:41.565529', 'step': 19507, 'epoch': 3} {'type': 'loss', 'content': 0.13810165226459503, 'timestamp': '2025-09-10 03:00:41.571679', 'step': 19508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:41.625126', 'step': 19508, 'epoch': 3} {'type': 'loss', 'content': 0.08272428810596466, 'timestamp': '2025-09-10 03:00:41.627262', 'step': 19509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:41.680559', 'step': 19509, 'epoch': 3} {'type': 'loss', 'content': 0.25985896587371826, 'timestamp': '2025-09-10 03:00:41.682738', 'step': 19510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:41.737383', 'step': 19510, 'epoch': 3} {'type': 'loss', 'content': 0.031701959669589996, 'timestamp': '2025-09-10 03:00:41.739458', 'step': 19511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:00:41.794882', 'step': 19511, 'epoch': 3} {'type': 'loss', 'content': 0.04644401744008064, 'timestamp': '2025-09-10 03:00:41.801108', 'step': 19512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:41.855664', 'step': 19512, 'epoch': 3} {'type': 'loss', 'content': 0.0538901761174202, 'timestamp': '2025-09-10 03:00:41.857929', 'step': 19513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:41.911629', 'step': 19513, 'epoch': 3} {'type': 'loss', 'content': 0.1761861890554428, 'timestamp': '2025-09-10 03:00:41.913898', 'step': 19514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:41.967865', 'step': 19514, 'epoch': 3} {'type': 'loss', 'content': 0.11150197684764862, 'timestamp': '2025-09-10 03:00:41.970073', 'step': 19515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:42.023444', 'step': 19515, 'epoch': 3} {'type': 'loss', 'content': 0.06441465765237808, 'timestamp': '2025-09-10 03:00:42.029348', 'step': 19516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:42.082441', 'step': 19516, 'epoch': 3} {'type': 'loss', 'content': 0.07229332625865936, 'timestamp': '2025-09-10 03:00:42.084553', 'step': 19517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:42.137747', 'step': 19517, 'epoch': 3} {'type': 'loss', 'content': 0.053711798042058945, 'timestamp': '2025-09-10 03:00:42.139904', 'step': 19518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:42.193936', 'step': 19518, 'epoch': 3} {'type': 'loss', 'content': 0.13172182440757751, 'timestamp': '2025-09-10 03:00:42.196113', 'step': 19519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:42.249683', 'step': 19519, 'epoch': 3} {'type': 'loss', 'content': 0.06299912184476852, 'timestamp': '2025-09-10 03:00:42.255555', 'step': 19520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:42.308910', 'step': 19520, 'epoch': 3} {'type': 'loss', 'content': 0.11154099553823471, 'timestamp': '2025-09-10 03:00:42.311057', 'step': 19521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:42.367211', 'step': 19521, 'epoch': 3} {'type': 'loss', 'content': 0.12467773258686066, 'timestamp': '2025-09-10 03:00:42.369396', 'step': 19522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:42.423939', 'step': 19522, 'epoch': 3} {'type': 'loss', 'content': 0.1301417350769043, 'timestamp': '2025-09-10 03:00:42.426123', 'step': 19523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:42.480542', 'step': 19523, 'epoch': 3} {'type': 'loss', 'content': 0.09024228900671005, 'timestamp': '2025-09-10 03:00:42.486531', 'step': 19524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:42.539897', 'step': 19524, 'epoch': 3} {'type': 'loss', 'content': 0.07412263751029968, 'timestamp': '2025-09-10 03:00:42.542111', 'step': 19525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:42.607711', 'step': 19525, 'epoch': 3} {'type': 'loss', 'content': 0.07739487290382385, 'timestamp': '2025-09-10 03:00:42.609891', 'step': 19526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:42.665631', 'step': 19526, 'epoch': 3} {'type': 'loss', 'content': 0.053850945085287094, 'timestamp': '2025-09-10 03:00:42.670607', 'step': 19527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:42.735323', 'step': 19527, 'epoch': 3} {'type': 'loss', 'content': 0.06593140214681625, 'timestamp': '2025-09-10 03:00:42.741367', 'step': 19528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:42.794082', 'step': 19528, 'epoch': 3} {'type': 'loss', 'content': 0.06330998986959457, 'timestamp': '2025-09-10 03:00:42.796283', 'step': 19529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:42.852810', 'step': 19529, 'epoch': 3} {'type': 'loss', 'content': 0.09353316575288773, 'timestamp': '2025-09-10 03:00:42.857189', 'step': 19530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:42.933017', 'step': 19530, 'epoch': 3} {'type': 'loss', 'content': 0.15351253747940063, 'timestamp': '2025-09-10 03:00:42.936595', 'step': 19531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:43.000425', 'step': 19531, 'epoch': 3} {'type': 'loss', 'content': 0.08292791247367859, 'timestamp': '2025-09-10 03:00:43.006490', 'step': 19532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:43.064068', 'step': 19532, 'epoch': 3} {'type': 'loss', 'content': 0.16863010823726654, 'timestamp': '2025-09-10 03:00:43.067087', 'step': 19533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:43.120623', 'step': 19533, 'epoch': 3} {'type': 'loss', 'content': 0.07498719543218613, 'timestamp': '2025-09-10 03:00:43.122835', 'step': 19534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:43.177857', 'step': 19534, 'epoch': 3} {'type': 'loss', 'content': 0.06554533541202545, 'timestamp': '2025-09-10 03:00:43.180225', 'step': 19535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:43.235371', 'step': 19535, 'epoch': 3} {'type': 'loss', 'content': 0.07802435010671616, 'timestamp': '2025-09-10 03:00:43.241487', 'step': 19536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:43.296971', 'step': 19536, 'epoch': 3} {'type': 'loss', 'content': 0.05045265704393387, 'timestamp': '2025-09-10 03:00:43.299108', 'step': 19537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:43.353172', 'step': 19537, 'epoch': 3} {'type': 'loss', 'content': 0.0301955658942461, 'timestamp': '2025-09-10 03:00:43.355292', 'step': 19538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:43.410877', 'step': 19538, 'epoch': 3} {'type': 'loss', 'content': 0.07067225873470306, 'timestamp': '2025-09-10 03:00:43.413188', 'step': 19539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:43.467720', 'step': 19539, 'epoch': 3} {'type': 'loss', 'content': 0.1692613959312439, 'timestamp': '2025-09-10 03:00:43.473911', 'step': 19540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:43.528079', 'step': 19540, 'epoch': 3} {'type': 'loss', 'content': 0.08993174880743027, 'timestamp': '2025-09-10 03:00:43.530430', 'step': 19541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:43.586114', 'step': 19541, 'epoch': 3} {'type': 'loss', 'content': 0.11298637092113495, 'timestamp': '2025-09-10 03:00:43.592573', 'step': 19542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:43.648356', 'step': 19542, 'epoch': 3} {'type': 'loss', 'content': 0.02613276243209839, 'timestamp': '2025-09-10 03:00:43.655009', 'step': 19543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:43.713865', 'step': 19543, 'epoch': 3} {'type': 'loss', 'content': 0.034041691571474075, 'timestamp': '2025-09-10 03:00:43.719855', 'step': 19544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:43.772638', 'step': 19544, 'epoch': 3} {'type': 'loss', 'content': 0.2090841829776764, 'timestamp': '2025-09-10 03:00:43.774862', 'step': 19545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:43.828153', 'step': 19545, 'epoch': 3} {'type': 'loss', 'content': 0.1260809749364853, 'timestamp': '2025-09-10 03:00:43.831535', 'step': 19546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:00:43.885598', 'step': 19546, 'epoch': 3} {'type': 'loss', 'content': 0.05211132392287254, 'timestamp': '2025-09-10 03:00:43.887729', 'step': 19547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:43.941571', 'step': 19547, 'epoch': 3} {'type': 'loss', 'content': 0.03742344677448273, 'timestamp': '2025-09-10 03:00:43.947613', 'step': 19548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:44.002984', 'step': 19548, 'epoch': 3} {'type': 'loss', 'content': 0.11431095749139786, 'timestamp': '2025-09-10 03:00:44.005017', 'step': 19549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:44.059581', 'step': 19549, 'epoch': 3} {'type': 'loss', 'content': 0.10867886245250702, 'timestamp': '2025-09-10 03:00:44.061770', 'step': 19550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:44.116246', 'step': 19550, 'epoch': 3} {'type': 'loss', 'content': 0.04009213298559189, 'timestamp': '2025-09-10 03:00:44.118467', 'step': 19551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:44.171643', 'step': 19551, 'epoch': 3} {'type': 'loss', 'content': 0.0691368579864502, 'timestamp': '2025-09-10 03:00:44.177414', 'step': 19552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:44.231407', 'step': 19552, 'epoch': 3} {'type': 'loss', 'content': 0.11421404778957367, 'timestamp': '2025-09-10 03:00:44.233625', 'step': 19553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:44.286840', 'step': 19553, 'epoch': 3} {'type': 'loss', 'content': 0.05949915573000908, 'timestamp': '2025-09-10 03:00:44.288977', 'step': 19554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:44.344128', 'step': 19554, 'epoch': 3} {'type': 'loss', 'content': 0.05262163281440735, 'timestamp': '2025-09-10 03:00:44.346096', 'step': 19555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:44.401382', 'step': 19555, 'epoch': 3} {'type': 'loss', 'content': 0.07568817585706711, 'timestamp': '2025-09-10 03:00:44.407651', 'step': 19556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:44.460701', 'step': 19556, 'epoch': 3} {'type': 'loss', 'content': 0.14275528490543365, 'timestamp': '2025-09-10 03:00:44.463119', 'step': 19557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:44.517698', 'step': 19557, 'epoch': 3} {'type': 'loss', 'content': 0.10727616399526596, 'timestamp': '2025-09-10 03:00:44.519905', 'step': 19558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:44.573971', 'step': 19558, 'epoch': 3} {'type': 'loss', 'content': 0.14952310919761658, 'timestamp': '2025-09-10 03:00:44.576134', 'step': 19559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:44.631081', 'step': 19559, 'epoch': 3} {'type': 'loss', 'content': 0.15451882779598236, 'timestamp': '2025-09-10 03:00:44.637252', 'step': 19560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:44.690551', 'step': 19560, 'epoch': 3} {'type': 'loss', 'content': 0.09525736421346664, 'timestamp': '2025-09-10 03:00:44.692668', 'step': 19561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:00:44.746827', 'step': 19561, 'epoch': 3} {'type': 'loss', 'content': 0.09737718105316162, 'timestamp': '2025-09-10 03:00:44.748995', 'step': 19562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:44.802742', 'step': 19562, 'epoch': 3} {'type': 'loss', 'content': 0.02687198668718338, 'timestamp': '2025-09-10 03:00:44.804898', 'step': 19563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:44.858079', 'step': 19563, 'epoch': 3} {'type': 'loss', 'content': 0.13631772994995117, 'timestamp': '2025-09-10 03:00:44.864027', 'step': 19564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:44.917245', 'step': 19564, 'epoch': 3} {'type': 'loss', 'content': 0.08978016674518585, 'timestamp': '2025-09-10 03:00:44.919466', 'step': 19565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:44.973240', 'step': 19565, 'epoch': 3} {'type': 'loss', 'content': 0.06049368903040886, 'timestamp': '2025-09-10 03:00:44.975482', 'step': 19566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:45.029206', 'step': 19566, 'epoch': 3} {'type': 'loss', 'content': 0.12270313501358032, 'timestamp': '2025-09-10 03:00:45.031556', 'step': 19567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:45.086496', 'step': 19567, 'epoch': 3} {'type': 'loss', 'content': 0.0727740004658699, 'timestamp': '2025-09-10 03:00:45.092704', 'step': 19568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:45.147425', 'step': 19568, 'epoch': 3} {'type': 'loss', 'content': 0.09557018429040909, 'timestamp': '2025-09-10 03:00:45.149641', 'step': 19569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:45.206238', 'step': 19569, 'epoch': 3} {'type': 'loss', 'content': 0.08820667117834091, 'timestamp': '2025-09-10 03:00:45.208534', 'step': 19570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:45.262302', 'step': 19570, 'epoch': 3} {'type': 'loss', 'content': 0.171577587723732, 'timestamp': '2025-09-10 03:00:45.264606', 'step': 19571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:45.317920', 'step': 19571, 'epoch': 3} {'type': 'loss', 'content': 0.0429873913526535, 'timestamp': '2025-09-10 03:00:45.325457', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 03:00:58.110994', 'step': 19572, 'epoch': 3} {'type': 'pplx', 'content': 10603.881867632837, 'timestamp': '2025-09-10 03:00:58.114114', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:58.167380', 'step': 19572, 'epoch': 3} {'type': 'loss', 'content': 0.08863463997840881, 'timestamp': '2025-09-10 03:00:58.169481', 'step': 19573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:58.224326', 'step': 19573, 'epoch': 3} {'type': 'loss', 'content': 0.1715584248304367, 'timestamp': '2025-09-10 03:00:58.226823', 'step': 19574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:58.280182', 'step': 19574, 'epoch': 3} {'type': 'loss', 'content': 0.14924047887325287, 'timestamp': '2025-09-10 03:00:58.282448', 'step': 19575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:58.335940', 'step': 19575, 'epoch': 3} {'type': 'loss', 'content': 0.043066807091236115, 'timestamp': '2025-09-10 03:00:58.342046', 'step': 19576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:58.395657', 'step': 19576, 'epoch': 3} {'type': 'loss', 'content': 0.06881988793611526, 'timestamp': '2025-09-10 03:00:58.397768', 'step': 19577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:58.451867', 'step': 19577, 'epoch': 3} {'type': 'loss', 'content': 0.11730825901031494, 'timestamp': '2025-09-10 03:00:58.454073', 'step': 19578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:58.508169', 'step': 19578, 'epoch': 3} {'type': 'loss', 'content': 0.08285374939441681, 'timestamp': '2025-09-10 03:00:58.510569', 'step': 19579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:58.564374', 'step': 19579, 'epoch': 3} {'type': 'loss', 'content': 0.12269333004951477, 'timestamp': '2025-09-10 03:00:58.570657', 'step': 19580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:58.623790', 'step': 19580, 'epoch': 3} {'type': 'loss', 'content': 0.042297326028347015, 'timestamp': '2025-09-10 03:00:58.626066', 'step': 19581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:58.681100', 'step': 19581, 'epoch': 3} {'type': 'loss', 'content': 0.050902046263217926, 'timestamp': '2025-09-10 03:00:58.683284', 'step': 19582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:58.736980', 'step': 19582, 'epoch': 3} {'type': 'loss', 'content': 0.09124520421028137, 'timestamp': '2025-09-10 03:00:58.739302', 'step': 19583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:58.792996', 'step': 19583, 'epoch': 3} {'type': 'loss', 'content': 0.07951946556568146, 'timestamp': '2025-09-10 03:00:58.799334', 'step': 19584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:58.853264', 'step': 19584, 'epoch': 3} {'type': 'loss', 'content': 0.04893840104341507, 'timestamp': '2025-09-10 03:00:58.855793', 'step': 19585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:58.909863', 'step': 19585, 'epoch': 3} {'type': 'loss', 'content': 0.09191481024026871, 'timestamp': '2025-09-10 03:00:58.912018', 'step': 19586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:58.966563', 'step': 19586, 'epoch': 3} {'type': 'loss', 'content': 0.032645173370838165, 'timestamp': '2025-09-10 03:00:58.968734', 'step': 19587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:59.023078', 'step': 19587, 'epoch': 3} {'type': 'loss', 'content': 0.13774321973323822, 'timestamp': '2025-09-10 03:00:59.029174', 'step': 19588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:59.082783', 'step': 19588, 'epoch': 3} {'type': 'loss', 'content': 0.07074779272079468, 'timestamp': '2025-09-10 03:00:59.084959', 'step': 19589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:59.138527', 'step': 19589, 'epoch': 3} {'type': 'loss', 'content': 0.11003099381923676, 'timestamp': '2025-09-10 03:00:59.140677', 'step': 19590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:59.194433', 'step': 19590, 'epoch': 3} {'type': 'loss', 'content': 0.14402678608894348, 'timestamp': '2025-09-10 03:00:59.196594', 'step': 19591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:59.250165', 'step': 19591, 'epoch': 3} {'type': 'loss', 'content': 0.18623052537441254, 'timestamp': '2025-09-10 03:00:59.256149', 'step': 19592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:59.308984', 'step': 19592, 'epoch': 3} {'type': 'loss', 'content': 0.11276083439588547, 'timestamp': '2025-09-10 03:00:59.311282', 'step': 19593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:59.364826', 'step': 19593, 'epoch': 3} {'type': 'loss', 'content': 0.05093766748905182, 'timestamp': '2025-09-10 03:00:59.367059', 'step': 19594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:00:59.421223', 'step': 19594, 'epoch': 3} {'type': 'loss', 'content': 0.1433592587709427, 'timestamp': '2025-09-10 03:00:59.423432', 'step': 19595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:59.478111', 'step': 19595, 'epoch': 3} {'type': 'loss', 'content': 0.09232917428016663, 'timestamp': '2025-09-10 03:00:59.484352', 'step': 19596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:59.538409', 'step': 19596, 'epoch': 3} {'type': 'loss', 'content': 0.12652269005775452, 'timestamp': '2025-09-10 03:00:59.540479', 'step': 19597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:59.594643', 'step': 19597, 'epoch': 3} {'type': 'loss', 'content': 0.09919103980064392, 'timestamp': '2025-09-10 03:00:59.596938', 'step': 19598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:59.650146', 'step': 19598, 'epoch': 3} {'type': 'loss', 'content': 0.1596372425556183, 'timestamp': '2025-09-10 03:00:59.652405', 'step': 19599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:59.706170', 'step': 19599, 'epoch': 3} {'type': 'loss', 'content': 0.07764474302530289, 'timestamp': '2025-09-10 03:00:59.712257', 'step': 19600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:59.764912', 'step': 19600, 'epoch': 3} {'type': 'loss', 'content': 0.06363006681203842, 'timestamp': '2025-09-10 03:00:59.767079', 'step': 19601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:00:59.822242', 'step': 19601, 'epoch': 3} {'type': 'loss', 'content': 0.09164294600486755, 'timestamp': '2025-09-10 03:00:59.824447', 'step': 19602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:59.878856', 'step': 19602, 'epoch': 3} {'type': 'loss', 'content': 0.02862194925546646, 'timestamp': '2025-09-10 03:00:59.880993', 'step': 19603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:00:59.934806', 'step': 19603, 'epoch': 3} {'type': 'loss', 'content': 0.14951936900615692, 'timestamp': '2025-09-10 03:00:59.940670', 'step': 19604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:00:59.994064', 'step': 19604, 'epoch': 3} {'type': 'loss', 'content': 0.09206830710172653, 'timestamp': '2025-09-10 03:00:59.996224', 'step': 19605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:00.051307', 'step': 19605, 'epoch': 3} {'type': 'loss', 'content': 0.07885970175266266, 'timestamp': '2025-09-10 03:01:00.053462', 'step': 19606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:00.107497', 'step': 19606, 'epoch': 3} {'type': 'loss', 'content': 0.09067431092262268, 'timestamp': '2025-09-10 03:01:00.109716', 'step': 19607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:00.164081', 'step': 19607, 'epoch': 3} {'type': 'loss', 'content': 0.09293677657842636, 'timestamp': '2025-09-10 03:01:00.170111', 'step': 19608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:00.223300', 'step': 19608, 'epoch': 3} {'type': 'loss', 'content': 0.027047174051404, 'timestamp': '2025-09-10 03:01:00.225704', 'step': 19609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:00.280393', 'step': 19609, 'epoch': 3} {'type': 'loss', 'content': 0.038880083709955215, 'timestamp': '2025-09-10 03:01:00.282574', 'step': 19610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:00.337288', 'step': 19610, 'epoch': 3} {'type': 'loss', 'content': 0.10365787893533707, 'timestamp': '2025-09-10 03:01:00.339461', 'step': 19611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:00.394403', 'step': 19611, 'epoch': 3} {'type': 'loss', 'content': 0.05334193632006645, 'timestamp': '2025-09-10 03:01:00.400269', 'step': 19612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:00.454864', 'step': 19612, 'epoch': 3} {'type': 'loss', 'content': 0.04466162249445915, 'timestamp': '2025-09-10 03:01:00.457010', 'step': 19613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:00.511248', 'step': 19613, 'epoch': 3} {'type': 'loss', 'content': 0.1218419224023819, 'timestamp': '2025-09-10 03:01:00.513446', 'step': 19614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:00.568021', 'step': 19614, 'epoch': 3} {'type': 'loss', 'content': 0.08344965428113937, 'timestamp': '2025-09-10 03:01:00.570286', 'step': 19615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:00.624719', 'step': 19615, 'epoch': 3} {'type': 'loss', 'content': 0.0695861354470253, 'timestamp': '2025-09-10 03:01:00.630662', 'step': 19616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:00.732808', 'step': 19616, 'epoch': 3} {'type': 'loss', 'content': 0.06983660161495209, 'timestamp': '2025-09-10 03:01:00.735006', 'step': 19617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:00.798127', 'step': 19617, 'epoch': 3} {'type': 'loss', 'content': 0.16409659385681152, 'timestamp': '2025-09-10 03:01:00.800418', 'step': 19618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:00.856672', 'step': 19618, 'epoch': 3} {'type': 'loss', 'content': 0.10966093838214874, 'timestamp': '2025-09-10 03:01:00.858843', 'step': 19619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:00.949843', 'step': 19619, 'epoch': 3} {'type': 'loss', 'content': 0.23655885457992554, 'timestamp': '2025-09-10 03:01:00.955748', 'step': 19620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:01.014891', 'step': 19620, 'epoch': 3} {'type': 'loss', 'content': 0.09842323511838913, 'timestamp': '2025-09-10 03:01:01.017027', 'step': 19621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:01.078702', 'step': 19621, 'epoch': 3} {'type': 'loss', 'content': 0.0885339230298996, 'timestamp': '2025-09-10 03:01:01.081129', 'step': 19622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:01.136540', 'step': 19622, 'epoch': 3} {'type': 'loss', 'content': 0.06071573868393898, 'timestamp': '2025-09-10 03:01:01.138801', 'step': 19623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:01.192552', 'step': 19623, 'epoch': 3} {'type': 'loss', 'content': 0.06256666034460068, 'timestamp': '2025-09-10 03:01:01.198869', 'step': 19624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:01.255049', 'step': 19624, 'epoch': 3} {'type': 'loss', 'content': 0.07149352133274078, 'timestamp': '2025-09-10 03:01:01.257284', 'step': 19625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:01.312455', 'step': 19625, 'epoch': 3} {'type': 'loss', 'content': 0.08603221923112869, 'timestamp': '2025-09-10 03:01:01.314737', 'step': 19626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:01.371363', 'step': 19626, 'epoch': 3} {'type': 'loss', 'content': 0.05068352445960045, 'timestamp': '2025-09-10 03:01:01.373616', 'step': 19627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:01.428003', 'step': 19627, 'epoch': 3} {'type': 'loss', 'content': 0.07362229377031326, 'timestamp': '2025-09-10 03:01:01.434316', 'step': 19628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:01.487768', 'step': 19628, 'epoch': 3} {'type': 'loss', 'content': 0.045727215707302094, 'timestamp': '2025-09-10 03:01:01.489965', 'step': 19629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:01.544128', 'step': 19629, 'epoch': 3} {'type': 'loss', 'content': 0.05621149763464928, 'timestamp': '2025-09-10 03:01:01.546401', 'step': 19630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:01.600293', 'step': 19630, 'epoch': 3} {'type': 'loss', 'content': 0.11533753573894501, 'timestamp': '2025-09-10 03:01:01.602644', 'step': 19631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:01.658112', 'step': 19631, 'epoch': 3} {'type': 'loss', 'content': 0.08485258370637894, 'timestamp': '2025-09-10 03:01:01.664135', 'step': 19632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:01.726444', 'step': 19632, 'epoch': 3} {'type': 'loss', 'content': 0.14630919694900513, 'timestamp': '2025-09-10 03:01:01.728659', 'step': 19633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:01.783969', 'step': 19633, 'epoch': 3} {'type': 'loss', 'content': 0.26706942915916443, 'timestamp': '2025-09-10 03:01:01.786199', 'step': 19634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:01.842268', 'step': 19634, 'epoch': 3} {'type': 'loss', 'content': 0.15646204352378845, 'timestamp': '2025-09-10 03:01:01.849348', 'step': 19635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:01.910269', 'step': 19635, 'epoch': 3} {'type': 'loss', 'content': 0.13880355656147003, 'timestamp': '2025-09-10 03:01:01.916559', 'step': 19636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:01.971139', 'step': 19636, 'epoch': 3} {'type': 'loss', 'content': 0.09663713723421097, 'timestamp': '2025-09-10 03:01:01.973468', 'step': 19637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:02.027858', 'step': 19637, 'epoch': 3} {'type': 'loss', 'content': 0.05470828711986542, 'timestamp': '2025-09-10 03:01:02.030182', 'step': 19638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:02.088127', 'step': 19638, 'epoch': 3} {'type': 'loss', 'content': 0.0819617509841919, 'timestamp': '2025-09-10 03:01:02.090327', 'step': 19639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:02.153331', 'step': 19639, 'epoch': 3} {'type': 'loss', 'content': 0.041114307940006256, 'timestamp': '2025-09-10 03:01:02.159997', 'step': 19640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:02.213485', 'step': 19640, 'epoch': 3} {'type': 'loss', 'content': 0.15863822400569916, 'timestamp': '2025-09-10 03:01:02.215653', 'step': 19641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:02.269863', 'step': 19641, 'epoch': 3} {'type': 'loss', 'content': 0.0716419517993927, 'timestamp': '2025-09-10 03:01:02.272838', 'step': 19642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:02.329061', 'step': 19642, 'epoch': 3} {'type': 'loss', 'content': 0.058637749403715134, 'timestamp': '2025-09-10 03:01:02.332729', 'step': 19643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:02.397653', 'step': 19643, 'epoch': 3} {'type': 'loss', 'content': 0.02769654616713524, 'timestamp': '2025-09-10 03:01:02.403734', 'step': 19644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:02.457769', 'step': 19644, 'epoch': 3} {'type': 'loss', 'content': 0.05333017557859421, 'timestamp': '2025-09-10 03:01:02.465796', 'step': 19645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:02.532427', 'step': 19645, 'epoch': 3} {'type': 'loss', 'content': 0.22894203662872314, 'timestamp': '2025-09-10 03:01:02.534858', 'step': 19646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:02.588333', 'step': 19646, 'epoch': 3} {'type': 'loss', 'content': 0.14414654672145844, 'timestamp': '2025-09-10 03:01:02.590497', 'step': 19647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:02.652569', 'step': 19647, 'epoch': 3} {'type': 'loss', 'content': 0.1383274346590042, 'timestamp': '2025-09-10 03:01:02.658482', 'step': 19648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:02.711444', 'step': 19648, 'epoch': 3} {'type': 'loss', 'content': 0.04206068068742752, 'timestamp': '2025-09-10 03:01:02.713643', 'step': 19649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:02.768623', 'step': 19649, 'epoch': 3} {'type': 'loss', 'content': 0.025471383705735207, 'timestamp': '2025-09-10 03:01:02.770852', 'step': 19650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:02.824166', 'step': 19650, 'epoch': 3} {'type': 'loss', 'content': 0.12001902610063553, 'timestamp': '2025-09-10 03:01:02.826479', 'step': 19651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:02.880826', 'step': 19651, 'epoch': 3} {'type': 'loss', 'content': 0.05001575127243996, 'timestamp': '2025-09-10 03:01:02.887113', 'step': 19652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:02.940516', 'step': 19652, 'epoch': 3} {'type': 'loss', 'content': 0.10522344708442688, 'timestamp': '2025-09-10 03:01:02.942754', 'step': 19653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:02.997095', 'step': 19653, 'epoch': 3} {'type': 'loss', 'content': 0.06202065199613571, 'timestamp': '2025-09-10 03:01:02.999295', 'step': 19654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:03.053721', 'step': 19654, 'epoch': 3} {'type': 'loss', 'content': 0.15852095186710358, 'timestamp': '2025-09-10 03:01:03.055909', 'step': 19655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:03.109616', 'step': 19655, 'epoch': 3} {'type': 'loss', 'content': 0.10594868659973145, 'timestamp': '2025-09-10 03:01:03.115567', 'step': 19656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:03.168893', 'step': 19656, 'epoch': 3} {'type': 'loss', 'content': 0.030799150466918945, 'timestamp': '2025-09-10 03:01:03.171068', 'step': 19657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:03.224049', 'step': 19657, 'epoch': 3} {'type': 'loss', 'content': 0.08671075105667114, 'timestamp': '2025-09-10 03:01:03.226197', 'step': 19658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:03.279983', 'step': 19658, 'epoch': 3} {'type': 'loss', 'content': 0.03927828371524811, 'timestamp': '2025-09-10 03:01:03.282183', 'step': 19659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:03.336756', 'step': 19659, 'epoch': 3} {'type': 'loss', 'content': 0.08045938611030579, 'timestamp': '2025-09-10 03:01:03.342645', 'step': 19660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:03.396751', 'step': 19660, 'epoch': 3} {'type': 'loss', 'content': 0.15432217717170715, 'timestamp': '2025-09-10 03:01:03.398942', 'step': 19661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:03.453966', 'step': 19661, 'epoch': 3} {'type': 'loss', 'content': 0.048007719218730927, 'timestamp': '2025-09-10 03:01:03.456204', 'step': 19662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:03.510199', 'step': 19662, 'epoch': 3} {'type': 'loss', 'content': 0.08051131665706635, 'timestamp': '2025-09-10 03:01:03.512350', 'step': 19663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:03.565926', 'step': 19663, 'epoch': 3} {'type': 'loss', 'content': 0.01332931686192751, 'timestamp': '2025-09-10 03:01:03.572017', 'step': 19664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:03.625207', 'step': 19664, 'epoch': 3} {'type': 'loss', 'content': 0.06293222308158875, 'timestamp': '2025-09-10 03:01:03.627569', 'step': 19665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:03.683302', 'step': 19665, 'epoch': 3} {'type': 'loss', 'content': 0.23208171129226685, 'timestamp': '2025-09-10 03:01:03.685749', 'step': 19666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:03.739926', 'step': 19666, 'epoch': 3} {'type': 'loss', 'content': 0.06818846613168716, 'timestamp': '2025-09-10 03:01:03.742198', 'step': 19667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:03.795639', 'step': 19667, 'epoch': 3} {'type': 'loss', 'content': 0.036107681691646576, 'timestamp': '2025-09-10 03:01:03.801790', 'step': 19668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:03.860405', 'step': 19668, 'epoch': 3} {'type': 'loss', 'content': 0.09774024784564972, 'timestamp': '2025-09-10 03:01:03.862619', 'step': 19669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:03.916566', 'step': 19669, 'epoch': 3} {'type': 'loss', 'content': 0.0251171812415123, 'timestamp': '2025-09-10 03:01:03.918886', 'step': 19670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:03.974104', 'step': 19670, 'epoch': 3} {'type': 'loss', 'content': 0.1347007304430008, 'timestamp': '2025-09-10 03:01:03.976468', 'step': 19671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:04.031284', 'step': 19671, 'epoch': 3} {'type': 'loss', 'content': 0.12631112337112427, 'timestamp': '2025-09-10 03:01:04.037390', 'step': 19672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:04.090589', 'step': 19672, 'epoch': 3} {'type': 'loss', 'content': 0.14151431620121002, 'timestamp': '2025-09-10 03:01:04.092817', 'step': 19673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:04.147927', 'step': 19673, 'epoch': 3} {'type': 'loss', 'content': 0.12307322025299072, 'timestamp': '2025-09-10 03:01:04.150157', 'step': 19674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:04.204461', 'step': 19674, 'epoch': 3} {'type': 'loss', 'content': 0.1150398850440979, 'timestamp': '2025-09-10 03:01:04.207721', 'step': 19675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:04.262112', 'step': 19675, 'epoch': 3} {'type': 'loss', 'content': 0.12205680459737778, 'timestamp': '2025-09-10 03:01:04.268181', 'step': 19676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:04.321284', 'step': 19676, 'epoch': 3} {'type': 'loss', 'content': 0.07759983837604523, 'timestamp': '2025-09-10 03:01:04.323440', 'step': 19677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:04.377140', 'step': 19677, 'epoch': 3} {'type': 'loss', 'content': 0.08006276190280914, 'timestamp': '2025-09-10 03:01:04.379403', 'step': 19678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:04.432663', 'step': 19678, 'epoch': 3} {'type': 'loss', 'content': 0.10579848289489746, 'timestamp': '2025-09-10 03:01:04.434750', 'step': 19679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:04.490953', 'step': 19679, 'epoch': 3} {'type': 'loss', 'content': 0.037457216531038284, 'timestamp': '2025-09-10 03:01:04.497185', 'step': 19680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:04.550560', 'step': 19680, 'epoch': 3} {'type': 'loss', 'content': 0.10673247277736664, 'timestamp': '2025-09-10 03:01:04.552694', 'step': 19681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:04.606332', 'step': 19681, 'epoch': 3} {'type': 'loss', 'content': 0.021350350230932236, 'timestamp': '2025-09-10 03:01:04.608517', 'step': 19682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:04.662340', 'step': 19682, 'epoch': 3} {'type': 'loss', 'content': 0.023288309574127197, 'timestamp': '2025-09-10 03:01:04.664375', 'step': 19683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:04.717432', 'step': 19683, 'epoch': 3} {'type': 'loss', 'content': 0.07616710662841797, 'timestamp': '2025-09-10 03:01:04.723576', 'step': 19684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:04.776864', 'step': 19684, 'epoch': 3} {'type': 'loss', 'content': 0.07092823088169098, 'timestamp': '2025-09-10 03:01:04.779181', 'step': 19685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:04.832162', 'step': 19685, 'epoch': 3} {'type': 'loss', 'content': 0.2289089411497116, 'timestamp': '2025-09-10 03:01:04.834658', 'step': 19686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:04.888609', 'step': 19686, 'epoch': 3} {'type': 'loss', 'content': 0.06135082244873047, 'timestamp': '2025-09-10 03:01:04.890798', 'step': 19687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:04.944516', 'step': 19687, 'epoch': 3} {'type': 'loss', 'content': 0.10181156545877457, 'timestamp': '2025-09-10 03:01:04.950417', 'step': 19688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:05.003868', 'step': 19688, 'epoch': 3} {'type': 'loss', 'content': 0.08580585569143295, 'timestamp': '2025-09-10 03:01:05.005984', 'step': 19689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:05.060346', 'step': 19689, 'epoch': 3} {'type': 'loss', 'content': 0.11239995807409286, 'timestamp': '2025-09-10 03:01:05.062510', 'step': 19690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:05.116181', 'step': 19690, 'epoch': 3} {'type': 'loss', 'content': 0.041849128901958466, 'timestamp': '2025-09-10 03:01:05.118220', 'step': 19691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:05.171790', 'step': 19691, 'epoch': 3} {'type': 'loss', 'content': 0.04598814621567726, 'timestamp': '2025-09-10 03:01:05.177710', 'step': 19692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:05.230842', 'step': 19692, 'epoch': 3} {'type': 'loss', 'content': 0.15491348505020142, 'timestamp': '2025-09-10 03:01:05.233056', 'step': 19693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:05.286311', 'step': 19693, 'epoch': 3} {'type': 'loss', 'content': 0.13180813193321228, 'timestamp': '2025-09-10 03:01:05.288335', 'step': 19694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:05.341775', 'step': 19694, 'epoch': 3} {'type': 'loss', 'content': 0.14350861310958862, 'timestamp': '2025-09-10 03:01:05.344374', 'step': 19695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:05.397881', 'step': 19695, 'epoch': 3} {'type': 'loss', 'content': 0.05169292539358139, 'timestamp': '2025-09-10 03:01:05.403631', 'step': 19696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:05.456810', 'step': 19696, 'epoch': 3} {'type': 'loss', 'content': 0.056712206453084946, 'timestamp': '2025-09-10 03:01:05.459181', 'step': 19697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:05.512840', 'step': 19697, 'epoch': 3} {'type': 'loss', 'content': 0.03957417979836464, 'timestamp': '2025-09-10 03:01:05.514975', 'step': 19698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:05.569530', 'step': 19698, 'epoch': 3} {'type': 'loss', 'content': 0.08956063538789749, 'timestamp': '2025-09-10 03:01:05.571695', 'step': 19699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:05.627711', 'step': 19699, 'epoch': 3} {'type': 'loss', 'content': 0.05238858982920647, 'timestamp': '2025-09-10 03:01:05.633590', 'step': 19700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:05.685855', 'step': 19700, 'epoch': 3} {'type': 'loss', 'content': 0.1714494824409485, 'timestamp': '2025-09-10 03:01:05.687990', 'step': 19701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:05.741697', 'step': 19701, 'epoch': 3} {'type': 'loss', 'content': 0.15007717907428741, 'timestamp': '2025-09-10 03:01:05.743877', 'step': 19702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:05.797764', 'step': 19702, 'epoch': 3} {'type': 'loss', 'content': 0.07282798737287521, 'timestamp': '2025-09-10 03:01:05.799919', 'step': 19703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:05.852815', 'step': 19703, 'epoch': 3} {'type': 'loss', 'content': 0.2336670160293579, 'timestamp': '2025-09-10 03:01:05.858675', 'step': 19704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:05.910836', 'step': 19704, 'epoch': 3} {'type': 'loss', 'content': 0.05946985259652138, 'timestamp': '2025-09-10 03:01:05.912890', 'step': 19705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:05.966419', 'step': 19705, 'epoch': 3} {'type': 'loss', 'content': 0.06862761080265045, 'timestamp': '2025-09-10 03:01:05.968578', 'step': 19706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:06.023694', 'step': 19706, 'epoch': 3} {'type': 'loss', 'content': 0.07249926030635834, 'timestamp': '2025-09-10 03:01:06.025836', 'step': 19707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:06.081397', 'step': 19707, 'epoch': 3} {'type': 'loss', 'content': 0.04995354637503624, 'timestamp': '2025-09-10 03:01:06.087479', 'step': 19708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:06.140330', 'step': 19708, 'epoch': 3} {'type': 'loss', 'content': 0.08039850741624832, 'timestamp': '2025-09-10 03:01:06.142597', 'step': 19709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:06.196588', 'step': 19709, 'epoch': 3} {'type': 'loss', 'content': 0.07797499746084213, 'timestamp': '2025-09-10 03:01:06.198743', 'step': 19710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:06.251914', 'step': 19710, 'epoch': 3} {'type': 'loss', 'content': 0.07225825637578964, 'timestamp': '2025-09-10 03:01:06.254184', 'step': 19711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:06.306901', 'step': 19711, 'epoch': 3} {'type': 'loss', 'content': 0.17591755092144012, 'timestamp': '2025-09-10 03:01:06.312771', 'step': 19712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:06.365479', 'step': 19712, 'epoch': 3} {'type': 'loss', 'content': 0.1536879539489746, 'timestamp': '2025-09-10 03:01:06.367883', 'step': 19713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:06.422333', 'step': 19713, 'epoch': 3} {'type': 'loss', 'content': 0.1101844310760498, 'timestamp': '2025-09-10 03:01:06.424612', 'step': 19714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:06.478140', 'step': 19714, 'epoch': 3} {'type': 'loss', 'content': 0.09902992099523544, 'timestamp': '2025-09-10 03:01:06.480299', 'step': 19715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:06.533259', 'step': 19715, 'epoch': 3} {'type': 'loss', 'content': 0.04500294104218483, 'timestamp': '2025-09-10 03:01:06.539089', 'step': 19716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:06.591931', 'step': 19716, 'epoch': 3} {'type': 'loss', 'content': 0.08937903493642807, 'timestamp': '2025-09-10 03:01:06.594489', 'step': 19717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:06.649429', 'step': 19717, 'epoch': 3} {'type': 'loss', 'content': 0.1206633523106575, 'timestamp': '2025-09-10 03:01:06.651554', 'step': 19718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:06.705332', 'step': 19718, 'epoch': 3} {'type': 'loss', 'content': 0.06637439131736755, 'timestamp': '2025-09-10 03:01:06.707531', 'step': 19719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:06.760641', 'step': 19719, 'epoch': 3} {'type': 'loss', 'content': 0.07461624592542648, 'timestamp': '2025-09-10 03:01:06.766706', 'step': 19720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:06.820326', 'step': 19720, 'epoch': 3} {'type': 'loss', 'content': 0.05164501443505287, 'timestamp': '2025-09-10 03:01:06.822461', 'step': 19721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:06.875830', 'step': 19721, 'epoch': 3} {'type': 'loss', 'content': 0.10752952098846436, 'timestamp': '2025-09-10 03:01:06.878063', 'step': 19722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:06.931514', 'step': 19722, 'epoch': 3} {'type': 'loss', 'content': 0.10298828035593033, 'timestamp': '2025-09-10 03:01:06.933741', 'step': 19723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:06.986783', 'step': 19723, 'epoch': 3} {'type': 'loss', 'content': 0.1260562688112259, 'timestamp': '2025-09-10 03:01:06.992552', 'step': 19724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:07.047400', 'step': 19724, 'epoch': 3} {'type': 'loss', 'content': 0.19464069604873657, 'timestamp': '2025-09-10 03:01:07.049554', 'step': 19725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:07.102328', 'step': 19725, 'epoch': 3} {'type': 'loss', 'content': 0.08680202811956406, 'timestamp': '2025-09-10 03:01:07.104488', 'step': 19726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:07.159791', 'step': 19726, 'epoch': 3} {'type': 'loss', 'content': 0.0634882003068924, 'timestamp': '2025-09-10 03:01:07.161971', 'step': 19727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:07.215878', 'step': 19727, 'epoch': 3} {'type': 'loss', 'content': 0.11248669773340225, 'timestamp': '2025-09-10 03:01:07.221858', 'step': 19728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:07.274590', 'step': 19728, 'epoch': 3} {'type': 'loss', 'content': 0.0679253488779068, 'timestamp': '2025-09-10 03:01:07.276718', 'step': 19729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:07.330221', 'step': 19729, 'epoch': 3} {'type': 'loss', 'content': 0.13407695293426514, 'timestamp': '2025-09-10 03:01:07.332337', 'step': 19730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:07.392231', 'step': 19730, 'epoch': 3} {'type': 'loss', 'content': 0.05481257289648056, 'timestamp': '2025-09-10 03:01:07.394387', 'step': 19731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:07.447946', 'step': 19731, 'epoch': 3} {'type': 'loss', 'content': 0.11023872345685959, 'timestamp': '2025-09-10 03:01:07.453953', 'step': 19732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:07.506082', 'step': 19732, 'epoch': 3} {'type': 'loss', 'content': 0.12219858914613724, 'timestamp': '2025-09-10 03:01:07.508205', 'step': 19733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:07.561522', 'step': 19733, 'epoch': 3} {'type': 'loss', 'content': 0.09193910658359528, 'timestamp': '2025-09-10 03:01:07.563652', 'step': 19734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:07.616503', 'step': 19734, 'epoch': 3} {'type': 'loss', 'content': 0.057141002267599106, 'timestamp': '2025-09-10 03:01:07.618566', 'step': 19735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:07.672726', 'step': 19735, 'epoch': 3} {'type': 'loss', 'content': 0.06861493736505508, 'timestamp': '2025-09-10 03:01:07.678735', 'step': 19736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:07.732688', 'step': 19736, 'epoch': 3} {'type': 'loss', 'content': 0.05411459505558014, 'timestamp': '2025-09-10 03:01:07.734898', 'step': 19737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:07.788604', 'step': 19737, 'epoch': 3} {'type': 'loss', 'content': 0.0609104260802269, 'timestamp': '2025-09-10 03:01:07.790987', 'step': 19738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:07.843790', 'step': 19738, 'epoch': 3} {'type': 'loss', 'content': 0.045865125954151154, 'timestamp': '2025-09-10 03:01:07.845959', 'step': 19739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:07.899205', 'step': 19739, 'epoch': 3} {'type': 'loss', 'content': 0.05430886149406433, 'timestamp': '2025-09-10 03:01:07.905007', 'step': 19740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:07.958002', 'step': 19740, 'epoch': 3} {'type': 'loss', 'content': 0.021809974685311317, 'timestamp': '2025-09-10 03:01:07.960015', 'step': 19741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:08.013856', 'step': 19741, 'epoch': 3} {'type': 'loss', 'content': 0.1454494148492813, 'timestamp': '2025-09-10 03:01:08.015962', 'step': 19742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:08.069619', 'step': 19742, 'epoch': 3} {'type': 'loss', 'content': 0.025690848007798195, 'timestamp': '2025-09-10 03:01:08.071822', 'step': 19743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:08.124993', 'step': 19743, 'epoch': 3} {'type': 'loss', 'content': 0.045003268867731094, 'timestamp': '2025-09-10 03:01:08.130752', 'step': 19744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:08.184101', 'step': 19744, 'epoch': 3} {'type': 'loss', 'content': 0.049766939133405685, 'timestamp': '2025-09-10 03:01:08.186224', 'step': 19745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:08.243414', 'step': 19745, 'epoch': 3} {'type': 'loss', 'content': 0.06176199018955231, 'timestamp': '2025-09-10 03:01:08.245586', 'step': 19746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:08.299714', 'step': 19746, 'epoch': 3} {'type': 'loss', 'content': 0.0732324868440628, 'timestamp': '2025-09-10 03:01:08.301889', 'step': 19747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:08.355252', 'step': 19747, 'epoch': 3} {'type': 'loss', 'content': 0.029558897018432617, 'timestamp': '2025-09-10 03:01:08.361003', 'step': 19748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:08.414500', 'step': 19748, 'epoch': 3} {'type': 'loss', 'content': 0.06202996149659157, 'timestamp': '2025-09-10 03:01:08.416673', 'step': 19749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:08.470020', 'step': 19749, 'epoch': 3} {'type': 'loss', 'content': 0.07129602879285812, 'timestamp': '2025-09-10 03:01:08.472200', 'step': 19750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:08.524890', 'step': 19750, 'epoch': 3} {'type': 'loss', 'content': 0.10020112991333008, 'timestamp': '2025-09-10 03:01:08.526954', 'step': 19751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:08.580657', 'step': 19751, 'epoch': 3} {'type': 'loss', 'content': 0.09779173135757446, 'timestamp': '2025-09-10 03:01:08.586595', 'step': 19752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:08.639258', 'step': 19752, 'epoch': 3} {'type': 'loss', 'content': 0.03734731301665306, 'timestamp': '2025-09-10 03:01:08.641235', 'step': 19753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:08.694232', 'step': 19753, 'epoch': 3} {'type': 'loss', 'content': 0.08790595829486847, 'timestamp': '2025-09-10 03:01:08.696436', 'step': 19754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:08.751193', 'step': 19754, 'epoch': 3} {'type': 'loss', 'content': 0.038851380348205566, 'timestamp': '2025-09-10 03:01:08.753419', 'step': 19755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:08.807308', 'step': 19755, 'epoch': 3} {'type': 'loss', 'content': 0.10406049340963364, 'timestamp': '2025-09-10 03:01:08.813533', 'step': 19756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:08.866498', 'step': 19756, 'epoch': 3} {'type': 'loss', 'content': 0.028401918709278107, 'timestamp': '2025-09-10 03:01:08.868607', 'step': 19757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:08.921506', 'step': 19757, 'epoch': 3} {'type': 'loss', 'content': 0.05013422667980194, 'timestamp': '2025-09-10 03:01:08.923641', 'step': 19758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:08.977068', 'step': 19758, 'epoch': 3} {'type': 'loss', 'content': 0.1779228299856186, 'timestamp': '2025-09-10 03:01:08.980459', 'step': 19759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:09.034514', 'step': 19759, 'epoch': 3} {'type': 'loss', 'content': 0.04393267631530762, 'timestamp': '2025-09-10 03:01:09.040371', 'step': 19760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:09.093426', 'step': 19760, 'epoch': 3} {'type': 'loss', 'content': 0.0730138048529625, 'timestamp': '2025-09-10 03:01:09.095566', 'step': 19761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:09.150279', 'step': 19761, 'epoch': 3} {'type': 'loss', 'content': 0.06839233636856079, 'timestamp': '2025-09-10 03:01:09.152596', 'step': 19762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:09.205075', 'step': 19762, 'epoch': 3} {'type': 'loss', 'content': 0.14589746296405792, 'timestamp': '2025-09-10 03:01:09.207286', 'step': 19763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:09.260549', 'step': 19763, 'epoch': 3} {'type': 'loss', 'content': 0.05422235652804375, 'timestamp': '2025-09-10 03:01:09.266513', 'step': 19764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:09.320088', 'step': 19764, 'epoch': 3} {'type': 'loss', 'content': 0.06515246629714966, 'timestamp': '2025-09-10 03:01:09.322250', 'step': 19765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:09.375515', 'step': 19765, 'epoch': 3} {'type': 'loss', 'content': 0.06898721307516098, 'timestamp': '2025-09-10 03:01:09.377787', 'step': 19766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:09.431363', 'step': 19766, 'epoch': 3} {'type': 'loss', 'content': 0.11167670041322708, 'timestamp': '2025-09-10 03:01:09.433499', 'step': 19767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:09.486788', 'step': 19767, 'epoch': 3} {'type': 'loss', 'content': 0.09471997618675232, 'timestamp': '2025-09-10 03:01:09.492609', 'step': 19768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:09.545501', 'step': 19768, 'epoch': 3} {'type': 'loss', 'content': 0.11466680467128754, 'timestamp': '2025-09-10 03:01:09.547615', 'step': 19769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:09.601282', 'step': 19769, 'epoch': 3} {'type': 'loss', 'content': 0.08181708306074142, 'timestamp': '2025-09-10 03:01:09.603459', 'step': 19770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:09.658090', 'step': 19770, 'epoch': 3} {'type': 'loss', 'content': 0.08791930973529816, 'timestamp': '2025-09-10 03:01:09.660215', 'step': 19771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:09.713158', 'step': 19771, 'epoch': 3} {'type': 'loss', 'content': 0.08939249068498611, 'timestamp': '2025-09-10 03:01:09.718882', 'step': 19772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:09.771833', 'step': 19772, 'epoch': 3} {'type': 'loss', 'content': 0.03926726058125496, 'timestamp': '2025-09-10 03:01:09.773921', 'step': 19773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:09.827894', 'step': 19773, 'epoch': 3} {'type': 'loss', 'content': 0.0673074722290039, 'timestamp': '2025-09-10 03:01:09.829929', 'step': 19774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:09.883926', 'step': 19774, 'epoch': 3} {'type': 'loss', 'content': 0.08144493401050568, 'timestamp': '2025-09-10 03:01:09.885828', 'step': 19775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:09.939453', 'step': 19775, 'epoch': 3} {'type': 'loss', 'content': 0.09799323976039886, 'timestamp': '2025-09-10 03:01:09.945505', 'step': 19776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:09.998608', 'step': 19776, 'epoch': 3} {'type': 'loss', 'content': 0.04980453848838806, 'timestamp': '2025-09-10 03:01:10.000588', 'step': 19777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:10.054525', 'step': 19777, 'epoch': 3} {'type': 'loss', 'content': 0.14853766560554504, 'timestamp': '2025-09-10 03:01:10.056716', 'step': 19778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:10.109973', 'step': 19778, 'epoch': 3} {'type': 'loss', 'content': 0.10846640169620514, 'timestamp': '2025-09-10 03:01:10.112203', 'step': 19779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:10.166263', 'step': 19779, 'epoch': 3} {'type': 'loss', 'content': 0.06728087365627289, 'timestamp': '2025-09-10 03:01:10.172446', 'step': 19780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:10.225424', 'step': 19780, 'epoch': 3} {'type': 'loss', 'content': 0.08649768680334091, 'timestamp': '2025-09-10 03:01:10.227798', 'step': 19781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:10.281697', 'step': 19781, 'epoch': 3} {'type': 'loss', 'content': 0.0724363848567009, 'timestamp': '2025-09-10 03:01:10.283901', 'step': 19782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:10.338434', 'step': 19782, 'epoch': 3} {'type': 'loss', 'content': 0.1338375061750412, 'timestamp': '2025-09-10 03:01:10.341832', 'step': 19783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:10.398004', 'step': 19783, 'epoch': 3} {'type': 'loss', 'content': 0.035772912204265594, 'timestamp': '2025-09-10 03:01:10.404509', 'step': 19784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:10.458056', 'step': 19784, 'epoch': 3} {'type': 'loss', 'content': 0.052793148905038834, 'timestamp': '2025-09-10 03:01:10.460548', 'step': 19785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:01:10.514146', 'step': 19785, 'epoch': 3} {'type': 'loss', 'content': 0.1100008562207222, 'timestamp': '2025-09-10 03:01:10.516198', 'step': 19786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:10.570662', 'step': 19786, 'epoch': 3} {'type': 'loss', 'content': 0.10069692134857178, 'timestamp': '2025-09-10 03:01:10.572834', 'step': 19787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:10.625538', 'step': 19787, 'epoch': 3} {'type': 'loss', 'content': 0.11064086109399796, 'timestamp': '2025-09-10 03:01:10.631563', 'step': 19788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:10.684509', 'step': 19788, 'epoch': 3} {'type': 'loss', 'content': 0.2075434923171997, 'timestamp': '2025-09-10 03:01:10.686680', 'step': 19789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:10.740430', 'step': 19789, 'epoch': 3} {'type': 'loss', 'content': 0.10893108695745468, 'timestamp': '2025-09-10 03:01:10.742589', 'step': 19790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:10.796033', 'step': 19790, 'epoch': 3} {'type': 'loss', 'content': 0.049868393689394, 'timestamp': '2025-09-10 03:01:10.798216', 'step': 19791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:10.851685', 'step': 19791, 'epoch': 3} {'type': 'loss', 'content': 0.029309432953596115, 'timestamp': '2025-09-10 03:01:10.857781', 'step': 19792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:10.910897', 'step': 19792, 'epoch': 3} {'type': 'loss', 'content': 0.03780638054013252, 'timestamp': '2025-09-10 03:01:10.913081', 'step': 19793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:10.966545', 'step': 19793, 'epoch': 3} {'type': 'loss', 'content': 0.03753309324383736, 'timestamp': '2025-09-10 03:01:10.970580', 'step': 19794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:11.027479', 'step': 19794, 'epoch': 3} {'type': 'loss', 'content': 0.07587315887212753, 'timestamp': '2025-09-10 03:01:11.029803', 'step': 19795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:11.083788', 'step': 19795, 'epoch': 3} {'type': 'loss', 'content': 0.07512634247541428, 'timestamp': '2025-09-10 03:01:11.089839', 'step': 19796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:11.142362', 'step': 19796, 'epoch': 3} {'type': 'loss', 'content': 0.08938752114772797, 'timestamp': '2025-09-10 03:01:11.144860', 'step': 19797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:11.201069', 'step': 19797, 'epoch': 3} {'type': 'loss', 'content': 0.061978381127119064, 'timestamp': '2025-09-10 03:01:11.203290', 'step': 19798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:11.263667', 'step': 19798, 'epoch': 3} {'type': 'loss', 'content': 0.12075518816709518, 'timestamp': '2025-09-10 03:01:11.265914', 'step': 19799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:11.319995', 'step': 19799, 'epoch': 3} {'type': 'loss', 'content': 0.12430822849273682, 'timestamp': '2025-09-10 03:01:11.326751', 'step': 19800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:11.382105', 'step': 19800, 'epoch': 3} {'type': 'loss', 'content': 0.14848971366882324, 'timestamp': '2025-09-10 03:01:11.384276', 'step': 19801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:11.438008', 'step': 19801, 'epoch': 3} {'type': 'loss', 'content': 0.06127513572573662, 'timestamp': '2025-09-10 03:01:11.440300', 'step': 19802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:11.495320', 'step': 19802, 'epoch': 3} {'type': 'loss', 'content': 0.03784023970365524, 'timestamp': '2025-09-10 03:01:11.497685', 'step': 19803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:11.551431', 'step': 19803, 'epoch': 3} {'type': 'loss', 'content': 0.06776829808950424, 'timestamp': '2025-09-10 03:01:11.561082', 'step': 19804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:11.616703', 'step': 19804, 'epoch': 3} {'type': 'loss', 'content': 0.05129224434494972, 'timestamp': '2025-09-10 03:01:11.618825', 'step': 19805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:11.672243', 'step': 19805, 'epoch': 3} {'type': 'loss', 'content': 0.1428069919347763, 'timestamp': '2025-09-10 03:01:11.674340', 'step': 19806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:11.727593', 'step': 19806, 'epoch': 3} {'type': 'loss', 'content': 0.0962071493268013, 'timestamp': '2025-09-10 03:01:11.729649', 'step': 19807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:11.783796', 'step': 19807, 'epoch': 3} {'type': 'loss', 'content': 0.08194120973348618, 'timestamp': '2025-09-10 03:01:11.789870', 'step': 19808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:11.847914', 'step': 19808, 'epoch': 3} {'type': 'loss', 'content': 0.11213697493076324, 'timestamp': '2025-09-10 03:01:11.850147', 'step': 19809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:11.903835', 'step': 19809, 'epoch': 3} {'type': 'loss', 'content': 0.06624963879585266, 'timestamp': '2025-09-10 03:01:11.906131', 'step': 19810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:11.961375', 'step': 19810, 'epoch': 3} {'type': 'loss', 'content': 0.14313967525959015, 'timestamp': '2025-09-10 03:01:11.963462', 'step': 19811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:12.017525', 'step': 19811, 'epoch': 3} {'type': 'loss', 'content': 0.05997234210371971, 'timestamp': '2025-09-10 03:01:12.023601', 'step': 19812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:12.076333', 'step': 19812, 'epoch': 3} {'type': 'loss', 'content': 0.15648455917835236, 'timestamp': '2025-09-10 03:01:12.078514', 'step': 19813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:12.131753', 'step': 19813, 'epoch': 3} {'type': 'loss', 'content': 0.15118220448493958, 'timestamp': '2025-09-10 03:01:12.133971', 'step': 19814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:12.188754', 'step': 19814, 'epoch': 3} {'type': 'loss', 'content': 0.12034225463867188, 'timestamp': '2025-09-10 03:01:12.190862', 'step': 19815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:12.244623', 'step': 19815, 'epoch': 3} {'type': 'loss', 'content': 0.043500181287527084, 'timestamp': '2025-09-10 03:01:12.250549', 'step': 19816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:12.303330', 'step': 19816, 'epoch': 3} {'type': 'loss', 'content': 0.08082374930381775, 'timestamp': '2025-09-10 03:01:12.305539', 'step': 19817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:12.359480', 'step': 19817, 'epoch': 3} {'type': 'loss', 'content': 0.04089617729187012, 'timestamp': '2025-09-10 03:01:12.361747', 'step': 19818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:12.415273', 'step': 19818, 'epoch': 3} {'type': 'loss', 'content': 0.11319993436336517, 'timestamp': '2025-09-10 03:01:12.417529', 'step': 19819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:12.471463', 'step': 19819, 'epoch': 3} {'type': 'loss', 'content': 0.12271825969219208, 'timestamp': '2025-09-10 03:01:12.477429', 'step': 19820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:12.531770', 'step': 19820, 'epoch': 3} {'type': 'loss', 'content': 0.05515533685684204, 'timestamp': '2025-09-10 03:01:12.534073', 'step': 19821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:12.588458', 'step': 19821, 'epoch': 3} {'type': 'loss', 'content': 0.08951635658740997, 'timestamp': '2025-09-10 03:01:12.590633', 'step': 19822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:12.644829', 'step': 19822, 'epoch': 3} {'type': 'loss', 'content': 0.047473177313804626, 'timestamp': '2025-09-10 03:01:12.647226', 'step': 19823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:12.701011', 'step': 19823, 'epoch': 3} {'type': 'loss', 'content': 0.02520802430808544, 'timestamp': '2025-09-10 03:01:12.707349', 'step': 19824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:12.760403', 'step': 19824, 'epoch': 3} {'type': 'loss', 'content': 0.06782467663288116, 'timestamp': '2025-09-10 03:01:12.762588', 'step': 19825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:12.815674', 'step': 19825, 'epoch': 3} {'type': 'loss', 'content': 0.06763138622045517, 'timestamp': '2025-09-10 03:01:12.817835', 'step': 19826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:12.873096', 'step': 19826, 'epoch': 3} {'type': 'loss', 'content': 0.1321277618408203, 'timestamp': '2025-09-10 03:01:12.875256', 'step': 19827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:12.929232', 'step': 19827, 'epoch': 3} {'type': 'loss', 'content': 0.10898204892873764, 'timestamp': '2025-09-10 03:01:12.935163', 'step': 19828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:12.988190', 'step': 19828, 'epoch': 3} {'type': 'loss', 'content': 0.06116363778710365, 'timestamp': '2025-09-10 03:01:12.990327', 'step': 19829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:13.043469', 'step': 19829, 'epoch': 3} {'type': 'loss', 'content': 0.08802905678749084, 'timestamp': '2025-09-10 03:01:13.045745', 'step': 19830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:13.102027', 'step': 19830, 'epoch': 3} {'type': 'loss', 'content': 0.13533835113048553, 'timestamp': '2025-09-10 03:01:13.104011', 'step': 19831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:13.157567', 'step': 19831, 'epoch': 3} {'type': 'loss', 'content': 0.10965341329574585, 'timestamp': '2025-09-10 03:01:13.163619', 'step': 19832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:13.216637', 'step': 19832, 'epoch': 3} {'type': 'loss', 'content': 0.056026749312877655, 'timestamp': '2025-09-10 03:01:13.218731', 'step': 19833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:13.272194', 'step': 19833, 'epoch': 3} {'type': 'loss', 'content': 0.18443474173545837, 'timestamp': '2025-09-10 03:01:13.274453', 'step': 19834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:13.327558', 'step': 19834, 'epoch': 3} {'type': 'loss', 'content': 0.07588794827461243, 'timestamp': '2025-09-10 03:01:13.329525', 'step': 19835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:13.383233', 'step': 19835, 'epoch': 3} {'type': 'loss', 'content': 0.08927077800035477, 'timestamp': '2025-09-10 03:01:13.389030', 'step': 19836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:13.441584', 'step': 19836, 'epoch': 3} {'type': 'loss', 'content': 0.13196934759616852, 'timestamp': '2025-09-10 03:01:13.444095', 'step': 19837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:13.497771', 'step': 19837, 'epoch': 3} {'type': 'loss', 'content': 0.0758775919675827, 'timestamp': '2025-09-10 03:01:13.500125', 'step': 19838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:13.553781', 'step': 19838, 'epoch': 3} {'type': 'loss', 'content': 0.05437835305929184, 'timestamp': '2025-09-10 03:01:13.556097', 'step': 19839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:13.611728', 'step': 19839, 'epoch': 3} {'type': 'loss', 'content': 0.11220259219408035, 'timestamp': '2025-09-10 03:01:13.617805', 'step': 19840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:13.670884', 'step': 19840, 'epoch': 3} {'type': 'loss', 'content': 0.07279974222183228, 'timestamp': '2025-09-10 03:01:13.672946', 'step': 19841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:13.726365', 'step': 19841, 'epoch': 3} {'type': 'loss', 'content': 0.08411121368408203, 'timestamp': '2025-09-10 03:01:13.728463', 'step': 19842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:13.782050', 'step': 19842, 'epoch': 3} {'type': 'loss', 'content': 0.1152404174208641, 'timestamp': '2025-09-10 03:01:13.784160', 'step': 19843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:13.837639', 'step': 19843, 'epoch': 3} {'type': 'loss', 'content': 0.06668249517679214, 'timestamp': '2025-09-10 03:01:13.843724', 'step': 19844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:13.896526', 'step': 19844, 'epoch': 3} {'type': 'loss', 'content': 0.13105858862400055, 'timestamp': '2025-09-10 03:01:13.898704', 'step': 19845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:13.951994', 'step': 19845, 'epoch': 3} {'type': 'loss', 'content': 0.08964245766401291, 'timestamp': '2025-09-10 03:01:13.954216', 'step': 19846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:14.007860', 'step': 19846, 'epoch': 3} {'type': 'loss', 'content': 0.20057713985443115, 'timestamp': '2025-09-10 03:01:14.010097', 'step': 19847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:14.062905', 'step': 19847, 'epoch': 3} {'type': 'loss', 'content': 0.10375342518091202, 'timestamp': '2025-09-10 03:01:14.068567', 'step': 19848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:14.122178', 'step': 19848, 'epoch': 3} {'type': 'loss', 'content': 0.08332552760839462, 'timestamp': '2025-09-10 03:01:14.124152', 'step': 19849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:14.177736', 'step': 19849, 'epoch': 3} {'type': 'loss', 'content': 0.1062416061758995, 'timestamp': '2025-09-10 03:01:14.179793', 'step': 19850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:14.232827', 'step': 19850, 'epoch': 3} {'type': 'loss', 'content': 0.07789915800094604, 'timestamp': '2025-09-10 03:01:14.235179', 'step': 19851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:14.288432', 'step': 19851, 'epoch': 3} {'type': 'loss', 'content': 0.06545405834913254, 'timestamp': '2025-09-10 03:01:14.294585', 'step': 19852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:14.347238', 'step': 19852, 'epoch': 3} {'type': 'loss', 'content': 0.031274549663066864, 'timestamp': '2025-09-10 03:01:14.349625', 'step': 19853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:14.402566', 'step': 19853, 'epoch': 3} {'type': 'loss', 'content': 0.11994055658578873, 'timestamp': '2025-09-10 03:01:14.404728', 'step': 19854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:14.458392', 'step': 19854, 'epoch': 3} {'type': 'loss', 'content': 0.08129812031984329, 'timestamp': '2025-09-10 03:01:14.460664', 'step': 19855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:14.514538', 'step': 19855, 'epoch': 3} {'type': 'loss', 'content': 0.11973083019256592, 'timestamp': '2025-09-10 03:01:14.520829', 'step': 19856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:14.575141', 'step': 19856, 'epoch': 3} {'type': 'loss', 'content': 0.1028551235795021, 'timestamp': '2025-09-10 03:01:14.577886', 'step': 19857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:14.632330', 'step': 19857, 'epoch': 3} {'type': 'loss', 'content': 0.13078761100769043, 'timestamp': '2025-09-10 03:01:14.634379', 'step': 19858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:14.689635', 'step': 19858, 'epoch': 3} {'type': 'loss', 'content': 0.09272602945566177, 'timestamp': '2025-09-10 03:01:14.691768', 'step': 19859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:14.746071', 'step': 19859, 'epoch': 3} {'type': 'loss', 'content': 0.051224395632743835, 'timestamp': '2025-09-10 03:01:14.752188', 'step': 19860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:14.805437', 'step': 19860, 'epoch': 3} {'type': 'loss', 'content': 0.09275980293750763, 'timestamp': '2025-09-10 03:01:14.807547', 'step': 19861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:14.861263', 'step': 19861, 'epoch': 3} {'type': 'loss', 'content': 0.113491490483284, 'timestamp': '2025-09-10 03:01:14.863476', 'step': 19862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:14.917125', 'step': 19862, 'epoch': 3} {'type': 'loss', 'content': 0.05503036081790924, 'timestamp': '2025-09-10 03:01:14.919155', 'step': 19863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:14.972158', 'step': 19863, 'epoch': 3} {'type': 'loss', 'content': 0.21248149871826172, 'timestamp': '2025-09-10 03:01:14.978026', 'step': 19864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:15.031293', 'step': 19864, 'epoch': 3} {'type': 'loss', 'content': 0.10946016013622284, 'timestamp': '2025-09-10 03:01:15.033506', 'step': 19865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:15.087182', 'step': 19865, 'epoch': 3} {'type': 'loss', 'content': 0.15369847416877747, 'timestamp': '2025-09-10 03:01:15.089486', 'step': 19866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:15.143547', 'step': 19866, 'epoch': 3} {'type': 'loss', 'content': 0.17076705396175385, 'timestamp': '2025-09-10 03:01:15.145830', 'step': 19867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:15.199852', 'step': 19867, 'epoch': 3} {'type': 'loss', 'content': 0.05502762272953987, 'timestamp': '2025-09-10 03:01:15.205919', 'step': 19868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:15.260208', 'step': 19868, 'epoch': 3} {'type': 'loss', 'content': 0.09348594397306442, 'timestamp': '2025-09-10 03:01:15.262510', 'step': 19869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:15.315673', 'step': 19869, 'epoch': 3} {'type': 'loss', 'content': 0.03850523382425308, 'timestamp': '2025-09-10 03:01:15.318030', 'step': 19870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:15.372733', 'step': 19870, 'epoch': 3} {'type': 'loss', 'content': 0.043617840856313705, 'timestamp': '2025-09-10 03:01:15.374907', 'step': 19871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:15.430908', 'step': 19871, 'epoch': 3} {'type': 'loss', 'content': 0.10010399669408798, 'timestamp': '2025-09-10 03:01:15.437069', 'step': 19872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:15.490676', 'step': 19872, 'epoch': 3} {'type': 'loss', 'content': 0.12496436387300491, 'timestamp': '2025-09-10 03:01:15.492990', 'step': 19873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:15.547418', 'step': 19873, 'epoch': 3} {'type': 'loss', 'content': 0.07946739345788956, 'timestamp': '2025-09-10 03:01:15.549595', 'step': 19874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:15.603352', 'step': 19874, 'epoch': 3} {'type': 'loss', 'content': 0.1094636619091034, 'timestamp': '2025-09-10 03:01:15.605642', 'step': 19875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:15.658957', 'step': 19875, 'epoch': 3} {'type': 'loss', 'content': 0.13054852187633514, 'timestamp': '2025-09-10 03:01:15.664923', 'step': 19876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:15.718233', 'step': 19876, 'epoch': 3} {'type': 'loss', 'content': 0.07433024048805237, 'timestamp': '2025-09-10 03:01:15.720437', 'step': 19877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:15.775297', 'step': 19877, 'epoch': 3} {'type': 'loss', 'content': 0.058608733117580414, 'timestamp': '2025-09-10 03:01:15.777545', 'step': 19878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:15.831324', 'step': 19878, 'epoch': 3} {'type': 'loss', 'content': 0.12694282829761505, 'timestamp': '2025-09-10 03:01:15.833564', 'step': 19879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:15.887091', 'step': 19879, 'epoch': 3} {'type': 'loss', 'content': 0.10873710364103317, 'timestamp': '2025-09-10 03:01:15.893425', 'step': 19880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:15.946523', 'step': 19880, 'epoch': 3} {'type': 'loss', 'content': 0.07457438111305237, 'timestamp': '2025-09-10 03:01:15.948845', 'step': 19881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:16.002616', 'step': 19881, 'epoch': 3} {'type': 'loss', 'content': 0.13973815739154816, 'timestamp': '2025-09-10 03:01:16.004774', 'step': 19882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:16.060228', 'step': 19882, 'epoch': 3} {'type': 'loss', 'content': 0.04475128650665283, 'timestamp': '2025-09-10 03:01:16.062431', 'step': 19883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:16.115926', 'step': 19883, 'epoch': 3} {'type': 'loss', 'content': 0.07736799865961075, 'timestamp': '2025-09-10 03:01:16.122126', 'step': 19884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:16.174741', 'step': 19884, 'epoch': 3} {'type': 'loss', 'content': 0.07591135799884796, 'timestamp': '2025-09-10 03:01:16.176954', 'step': 19885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:16.233499', 'step': 19885, 'epoch': 3} {'type': 'loss', 'content': 0.11708430200815201, 'timestamp': '2025-09-10 03:01:16.235669', 'step': 19886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:16.290478', 'step': 19886, 'epoch': 3} {'type': 'loss', 'content': 0.07157397270202637, 'timestamp': '2025-09-10 03:01:16.292636', 'step': 19887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:16.347643', 'step': 19887, 'epoch': 3} {'type': 'loss', 'content': 0.049950920045375824, 'timestamp': '2025-09-10 03:01:16.353815', 'step': 19888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:16.407763', 'step': 19888, 'epoch': 3} {'type': 'loss', 'content': 0.167755126953125, 'timestamp': '2025-09-10 03:01:16.409928', 'step': 19889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:16.463407', 'step': 19889, 'epoch': 3} {'type': 'loss', 'content': 0.10270023345947266, 'timestamp': '2025-09-10 03:01:16.465604', 'step': 19890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:16.519278', 'step': 19890, 'epoch': 3} {'type': 'loss', 'content': 0.05792749300599098, 'timestamp': '2025-09-10 03:01:16.521740', 'step': 19891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:16.575652', 'step': 19891, 'epoch': 3} {'type': 'loss', 'content': 0.010672246105968952, 'timestamp': '2025-09-10 03:01:16.581666', 'step': 19892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:16.634778', 'step': 19892, 'epoch': 3} {'type': 'loss', 'content': 0.1017884835600853, 'timestamp': '2025-09-10 03:01:16.636929', 'step': 19893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:16.690787', 'step': 19893, 'epoch': 3} {'type': 'loss', 'content': 0.12076690793037415, 'timestamp': '2025-09-10 03:01:16.693063', 'step': 19894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:16.747291', 'step': 19894, 'epoch': 3} {'type': 'loss', 'content': 0.040549829602241516, 'timestamp': '2025-09-10 03:01:16.749479', 'step': 19895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:16.804424', 'step': 19895, 'epoch': 3} {'type': 'loss', 'content': 0.09490247815847397, 'timestamp': '2025-09-10 03:01:16.811015', 'step': 19896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:16.864404', 'step': 19896, 'epoch': 3} {'type': 'loss', 'content': 0.10126214474439621, 'timestamp': '2025-09-10 03:01:16.866479', 'step': 19897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:16.920023', 'step': 19897, 'epoch': 3} {'type': 'loss', 'content': 0.06151978671550751, 'timestamp': '2025-09-10 03:01:16.922355', 'step': 19898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:16.976008', 'step': 19898, 'epoch': 3} {'type': 'loss', 'content': 0.15404438972473145, 'timestamp': '2025-09-10 03:01:16.978093', 'step': 19899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:17.030919', 'step': 19899, 'epoch': 3} {'type': 'loss', 'content': 0.08141720294952393, 'timestamp': '2025-09-10 03:01:17.036981', 'step': 19900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:17.088844', 'step': 19900, 'epoch': 3} {'type': 'loss', 'content': 0.06869889050722122, 'timestamp': '2025-09-10 03:01:17.090964', 'step': 19901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:17.144059', 'step': 19901, 'epoch': 3} {'type': 'loss', 'content': 0.07465032488107681, 'timestamp': '2025-09-10 03:01:17.146085', 'step': 19902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:17.199510', 'step': 19902, 'epoch': 3} {'type': 'loss', 'content': 0.059743646532297134, 'timestamp': '2025-09-10 03:01:17.201649', 'step': 19903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:17.255914', 'step': 19903, 'epoch': 3} {'type': 'loss', 'content': 0.11728011816740036, 'timestamp': '2025-09-10 03:01:17.261571', 'step': 19904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:17.314343', 'step': 19904, 'epoch': 3} {'type': 'loss', 'content': 0.12462100386619568, 'timestamp': '2025-09-10 03:01:17.316566', 'step': 19905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:17.371732', 'step': 19905, 'epoch': 3} {'type': 'loss', 'content': 0.10001659393310547, 'timestamp': '2025-09-10 03:01:17.373862', 'step': 19906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:17.427105', 'step': 19906, 'epoch': 3} {'type': 'loss', 'content': 0.09798279404640198, 'timestamp': '2025-09-10 03:01:17.429234', 'step': 19907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:17.483105', 'step': 19907, 'epoch': 3} {'type': 'loss', 'content': 0.09396017342805862, 'timestamp': '2025-09-10 03:01:17.489112', 'step': 19908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:17.542154', 'step': 19908, 'epoch': 3} {'type': 'loss', 'content': 0.070159912109375, 'timestamp': '2025-09-10 03:01:17.544517', 'step': 19909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:17.597783', 'step': 19909, 'epoch': 3} {'type': 'loss', 'content': 0.07453522086143494, 'timestamp': '2025-09-10 03:01:17.600098', 'step': 19910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:17.653269', 'step': 19910, 'epoch': 3} {'type': 'loss', 'content': 0.15641196072101593, 'timestamp': '2025-09-10 03:01:17.655406', 'step': 19911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:17.708216', 'step': 19911, 'epoch': 3} {'type': 'loss', 'content': 0.12219148129224777, 'timestamp': '2025-09-10 03:01:17.714199', 'step': 19912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:17.767202', 'step': 19912, 'epoch': 3} {'type': 'loss', 'content': 0.058701127767562866, 'timestamp': '2025-09-10 03:01:17.769470', 'step': 19913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:17.822636', 'step': 19913, 'epoch': 3} {'type': 'loss', 'content': 0.18930059671401978, 'timestamp': '2025-09-10 03:01:17.824579', 'step': 19914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:17.879442', 'step': 19914, 'epoch': 3} {'type': 'loss', 'content': 0.09729275107383728, 'timestamp': '2025-09-10 03:01:17.881428', 'step': 19915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:17.934658', 'step': 19915, 'epoch': 3} {'type': 'loss', 'content': 0.08296200633049011, 'timestamp': '2025-09-10 03:01:17.940845', 'step': 19916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:17.993611', 'step': 19916, 'epoch': 3} {'type': 'loss', 'content': 0.089945949614048, 'timestamp': '2025-09-10 03:01:17.995574', 'step': 19917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:18.050757', 'step': 19917, 'epoch': 3} {'type': 'loss', 'content': 0.0661112517118454, 'timestamp': '2025-09-10 03:01:18.053024', 'step': 19918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:18.108538', 'step': 19918, 'epoch': 3} {'type': 'loss', 'content': 0.14540933072566986, 'timestamp': '2025-09-10 03:01:18.110802', 'step': 19919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:01:18.165703', 'step': 19919, 'epoch': 3} {'type': 'loss', 'content': 0.06242747977375984, 'timestamp': '2025-09-10 03:01:18.171763', 'step': 19920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:18.223718', 'step': 19920, 'epoch': 3} {'type': 'loss', 'content': 0.08185826987028122, 'timestamp': '2025-09-10 03:01:18.225787', 'step': 19921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:18.278514', 'step': 19921, 'epoch': 3} {'type': 'loss', 'content': 0.09175600856542587, 'timestamp': '2025-09-10 03:01:18.280509', 'step': 19922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:18.333563', 'step': 19922, 'epoch': 3} {'type': 'loss', 'content': 0.13332436978816986, 'timestamp': '2025-09-10 03:01:18.335901', 'step': 19923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:18.389710', 'step': 19923, 'epoch': 3} {'type': 'loss', 'content': 0.0761367529630661, 'timestamp': '2025-09-10 03:01:18.396019', 'step': 19924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:18.450269', 'step': 19924, 'epoch': 3} {'type': 'loss', 'content': 0.03905295208096504, 'timestamp': '2025-09-10 03:01:18.452462', 'step': 19925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:18.505486', 'step': 19925, 'epoch': 3} {'type': 'loss', 'content': 0.05341146141290665, 'timestamp': '2025-09-10 03:01:18.507589', 'step': 19926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:18.560673', 'step': 19926, 'epoch': 3} {'type': 'loss', 'content': 0.069009929895401, 'timestamp': '2025-09-10 03:01:18.564399', 'step': 19927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:18.619153', 'step': 19927, 'epoch': 3} {'type': 'loss', 'content': 0.08632459491491318, 'timestamp': '2025-09-10 03:01:18.625065', 'step': 19928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:18.677960', 'step': 19928, 'epoch': 3} {'type': 'loss', 'content': 0.1289130449295044, 'timestamp': '2025-09-10 03:01:18.680193', 'step': 19929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:18.732652', 'step': 19929, 'epoch': 3} {'type': 'loss', 'content': 0.07404951751232147, 'timestamp': '2025-09-10 03:01:18.734758', 'step': 19930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:18.787542', 'step': 19930, 'epoch': 3} {'type': 'loss', 'content': 0.1261671483516693, 'timestamp': '2025-09-10 03:01:18.789648', 'step': 19931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:18.843226', 'step': 19931, 'epoch': 3} {'type': 'loss', 'content': 0.05866173282265663, 'timestamp': '2025-09-10 03:01:18.848993', 'step': 19932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:18.901552', 'step': 19932, 'epoch': 3} {'type': 'loss', 'content': 0.10936091840267181, 'timestamp': '2025-09-10 03:01:18.903510', 'step': 19933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:18.957222', 'step': 19933, 'epoch': 3} {'type': 'loss', 'content': 0.09367262572050095, 'timestamp': '2025-09-10 03:01:18.959309', 'step': 19934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:19.012442', 'step': 19934, 'epoch': 3} {'type': 'loss', 'content': 0.04636091738939285, 'timestamp': '2025-09-10 03:01:19.014504', 'step': 19935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:19.067885', 'step': 19935, 'epoch': 3} {'type': 'loss', 'content': 0.08172328770160675, 'timestamp': '2025-09-10 03:01:19.073764', 'step': 19936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:19.126656', 'step': 19936, 'epoch': 3} {'type': 'loss', 'content': 0.11948265880346298, 'timestamp': '2025-09-10 03:01:19.128890', 'step': 19937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:19.181759', 'step': 19937, 'epoch': 3} {'type': 'loss', 'content': 0.13530656695365906, 'timestamp': '2025-09-10 03:01:19.184067', 'step': 19938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:19.237315', 'step': 19938, 'epoch': 3} {'type': 'loss', 'content': 0.10303983837366104, 'timestamp': '2025-09-10 03:01:19.239566', 'step': 19939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:19.292399', 'step': 19939, 'epoch': 3} {'type': 'loss', 'content': 0.10477599501609802, 'timestamp': '2025-09-10 03:01:19.298131', 'step': 19940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:19.350605', 'step': 19940, 'epoch': 3} {'type': 'loss', 'content': 0.03194532170891762, 'timestamp': '2025-09-10 03:01:19.352718', 'step': 19941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:19.405781', 'step': 19941, 'epoch': 3} {'type': 'loss', 'content': 0.08330478519201279, 'timestamp': '2025-09-10 03:01:19.409261', 'step': 19942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:19.464894', 'step': 19942, 'epoch': 3} {'type': 'loss', 'content': 0.071216881275177, 'timestamp': '2025-09-10 03:01:19.467047', 'step': 19943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:19.521767', 'step': 19943, 'epoch': 3} {'type': 'loss', 'content': 0.11722570657730103, 'timestamp': '2025-09-10 03:01:19.527571', 'step': 19944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:19.582253', 'step': 19944, 'epoch': 3} {'type': 'loss', 'content': 0.10895304381847382, 'timestamp': '2025-09-10 03:01:19.584513', 'step': 19945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:19.638461', 'step': 19945, 'epoch': 3} {'type': 'loss', 'content': 0.09821060299873352, 'timestamp': '2025-09-10 03:01:19.640706', 'step': 19946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:19.697236', 'step': 19946, 'epoch': 3} {'type': 'loss', 'content': 0.14157645404338837, 'timestamp': '2025-09-10 03:01:19.699430', 'step': 19947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:19.757671', 'step': 19947, 'epoch': 3} {'type': 'loss', 'content': 0.08733374625444412, 'timestamp': '2025-09-10 03:01:19.764132', 'step': 19948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:19.818791', 'step': 19948, 'epoch': 3} {'type': 'loss', 'content': 0.11716167628765106, 'timestamp': '2025-09-10 03:01:19.820998', 'step': 19949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:19.876531', 'step': 19949, 'epoch': 3} {'type': 'loss', 'content': 0.07749728113412857, 'timestamp': '2025-09-10 03:01:19.878491', 'step': 19950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:19.932226', 'step': 19950, 'epoch': 3} {'type': 'loss', 'content': 0.10837872326374054, 'timestamp': '2025-09-10 03:01:19.934483', 'step': 19951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:19.989360', 'step': 19951, 'epoch': 3} {'type': 'loss', 'content': 0.09842108190059662, 'timestamp': '2025-09-10 03:01:19.995585', 'step': 19952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:20.049222', 'step': 19952, 'epoch': 3} {'type': 'loss', 'content': 0.11013344675302505, 'timestamp': '2025-09-10 03:01:20.051395', 'step': 19953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:20.104677', 'step': 19953, 'epoch': 3} {'type': 'loss', 'content': 0.09448494762182236, 'timestamp': '2025-09-10 03:01:20.106994', 'step': 19954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:20.160523', 'step': 19954, 'epoch': 3} {'type': 'loss', 'content': 0.08221124112606049, 'timestamp': '2025-09-10 03:01:20.162776', 'step': 19955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:20.215679', 'step': 19955, 'epoch': 3} {'type': 'loss', 'content': 0.13625499606132507, 'timestamp': '2025-09-10 03:01:20.221796', 'step': 19956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:20.275110', 'step': 19956, 'epoch': 3} {'type': 'loss', 'content': 0.08676360547542572, 'timestamp': '2025-09-10 03:01:20.277301', 'step': 19957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:20.330057', 'step': 19957, 'epoch': 3} {'type': 'loss', 'content': 0.09564124792814255, 'timestamp': '2025-09-10 03:01:20.332268', 'step': 19958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:20.385650', 'step': 19958, 'epoch': 3} {'type': 'loss', 'content': 0.11644355207681656, 'timestamp': '2025-09-10 03:01:20.387634', 'step': 19959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:20.441426', 'step': 19959, 'epoch': 3} {'type': 'loss', 'content': 0.11441265791654587, 'timestamp': '2025-09-10 03:01:20.447496', 'step': 19960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:20.504131', 'step': 19960, 'epoch': 3} {'type': 'loss', 'content': 0.06275131553411484, 'timestamp': '2025-09-10 03:01:20.506260', 'step': 19961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:20.564225', 'step': 19961, 'epoch': 3} {'type': 'loss', 'content': 0.13498148322105408, 'timestamp': '2025-09-10 03:01:20.566597', 'step': 19962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:20.627980', 'step': 19962, 'epoch': 3} {'type': 'loss', 'content': 0.09972067922353745, 'timestamp': '2025-09-10 03:01:20.630467', 'step': 19963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:20.684412', 'step': 19963, 'epoch': 3} {'type': 'loss', 'content': 0.09230587631464005, 'timestamp': '2025-09-10 03:01:20.696244', 'step': 19964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:20.752187', 'step': 19964, 'epoch': 3} {'type': 'loss', 'content': 0.09073273837566376, 'timestamp': '2025-09-10 03:01:20.754375', 'step': 19965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:20.807698', 'step': 19965, 'epoch': 3} {'type': 'loss', 'content': 0.08642977476119995, 'timestamp': '2025-09-10 03:01:20.810017', 'step': 19966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:20.863479', 'step': 19966, 'epoch': 3} {'type': 'loss', 'content': 0.17603740096092224, 'timestamp': '2025-09-10 03:01:20.865859', 'step': 19967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:20.927363', 'step': 19967, 'epoch': 3} {'type': 'loss', 'content': 0.06686320900917053, 'timestamp': '2025-09-10 03:01:20.936958', 'step': 19968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:01:20.991838', 'step': 19968, 'epoch': 3} {'type': 'loss', 'content': 0.0772646814584732, 'timestamp': '2025-09-10 03:01:20.993963', 'step': 19969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:21.049675', 'step': 19969, 'epoch': 3} {'type': 'loss', 'content': 0.08446016907691956, 'timestamp': '2025-09-10 03:01:21.052520', 'step': 19970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:21.114619', 'step': 19970, 'epoch': 3} {'type': 'loss', 'content': 0.11192039400339127, 'timestamp': '2025-09-10 03:01:21.119397', 'step': 19971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:21.173481', 'step': 19971, 'epoch': 3} {'type': 'loss', 'content': 0.12349896132946014, 'timestamp': '2025-09-10 03:01:21.185224', 'step': 19972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:21.237946', 'step': 19972, 'epoch': 3} {'type': 'loss', 'content': 0.11094874888658524, 'timestamp': '2025-09-10 03:01:21.240181', 'step': 19973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:21.294615', 'step': 19973, 'epoch': 3} {'type': 'loss', 'content': 0.05314977094531059, 'timestamp': '2025-09-10 03:01:21.298199', 'step': 19974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:21.356362', 'step': 19974, 'epoch': 3} {'type': 'loss', 'content': 0.1046384945511818, 'timestamp': '2025-09-10 03:01:21.358494', 'step': 19975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:21.411919', 'step': 19975, 'epoch': 3} {'type': 'loss', 'content': 0.04473905265331268, 'timestamp': '2025-09-10 03:01:21.417784', 'step': 19976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:21.470694', 'step': 19976, 'epoch': 3} {'type': 'loss', 'content': 0.16636832058429718, 'timestamp': '2025-09-10 03:01:21.473061', 'step': 19977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:21.526641', 'step': 19977, 'epoch': 3} {'type': 'loss', 'content': 0.026678455993533134, 'timestamp': '2025-09-10 03:01:21.528922', 'step': 19978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:21.584055', 'step': 19978, 'epoch': 3} {'type': 'loss', 'content': 0.07697435468435287, 'timestamp': '2025-09-10 03:01:21.586297', 'step': 19979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:21.640380', 'step': 19979, 'epoch': 3} {'type': 'loss', 'content': 0.06022194027900696, 'timestamp': '2025-09-10 03:01:21.646310', 'step': 19980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:21.698688', 'step': 19980, 'epoch': 3} {'type': 'loss', 'content': 0.05416133627295494, 'timestamp': '2025-09-10 03:01:21.700704', 'step': 19981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:21.754208', 'step': 19981, 'epoch': 3} {'type': 'loss', 'content': 0.04888465628027916, 'timestamp': '2025-09-10 03:01:21.756511', 'step': 19982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:21.810804', 'step': 19982, 'epoch': 3} {'type': 'loss', 'content': 0.04525095596909523, 'timestamp': '2025-09-10 03:01:21.812958', 'step': 19983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:21.866222', 'step': 19983, 'epoch': 3} {'type': 'loss', 'content': 0.16488046944141388, 'timestamp': '2025-09-10 03:01:21.872061', 'step': 19984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:21.925128', 'step': 19984, 'epoch': 3} {'type': 'loss', 'content': 0.088704414665699, 'timestamp': '2025-09-10 03:01:21.927246', 'step': 19985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:21.980336', 'step': 19985, 'epoch': 3} {'type': 'loss', 'content': 0.14755935966968536, 'timestamp': '2025-09-10 03:01:21.982468', 'step': 19986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:22.035299', 'step': 19986, 'epoch': 3} {'type': 'loss', 'content': 0.08724033087491989, 'timestamp': '2025-09-10 03:01:22.037453', 'step': 19987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:22.090476', 'step': 19987, 'epoch': 3} {'type': 'loss', 'content': 0.029302753508090973, 'timestamp': '2025-09-10 03:01:22.096144', 'step': 19988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:22.149668', 'step': 19988, 'epoch': 3} {'type': 'loss', 'content': 0.10587673634290695, 'timestamp': '2025-09-10 03:01:22.151825', 'step': 19989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:22.205602', 'step': 19989, 'epoch': 3} {'type': 'loss', 'content': 0.15126247704029083, 'timestamp': '2025-09-10 03:01:22.207914', 'step': 19990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:22.261740', 'step': 19990, 'epoch': 3} {'type': 'loss', 'content': 0.09750943630933762, 'timestamp': '2025-09-10 03:01:22.263680', 'step': 19991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:22.317933', 'step': 19991, 'epoch': 3} {'type': 'loss', 'content': 0.019693586975336075, 'timestamp': '2025-09-10 03:01:22.323804', 'step': 19992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:22.376362', 'step': 19992, 'epoch': 3} {'type': 'loss', 'content': 0.05067054554820061, 'timestamp': '2025-09-10 03:01:22.378470', 'step': 19993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:22.430319', 'step': 19993, 'epoch': 3} {'type': 'loss', 'content': 0.09011615067720413, 'timestamp': '2025-09-10 03:01:22.432618', 'step': 19994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:22.486528', 'step': 19994, 'epoch': 3} {'type': 'loss', 'content': 0.06671645492315292, 'timestamp': '2025-09-10 03:01:22.488765', 'step': 19995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:22.542278', 'step': 19995, 'epoch': 3} {'type': 'loss', 'content': 0.08276037871837616, 'timestamp': '2025-09-10 03:01:22.548199', 'step': 19996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:22.601210', 'step': 19996, 'epoch': 3} {'type': 'loss', 'content': 0.04704790934920311, 'timestamp': '2025-09-10 03:01:22.603390', 'step': 19997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:22.657359', 'step': 19997, 'epoch': 3} {'type': 'loss', 'content': 0.09777051210403442, 'timestamp': '2025-09-10 03:01:22.659586', 'step': 19998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:22.714109', 'step': 19998, 'epoch': 3} {'type': 'loss', 'content': 0.06695151329040527, 'timestamp': '2025-09-10 03:01:22.716193', 'step': 19999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:22.770144', 'step': 19999, 'epoch': 3} {'type': 'loss', 'content': 0.031641364097595215, 'timestamp': '2025-09-10 03:01:22.776152', 'step': 20000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20000', 'timestamp': '2025-09-10 03:01:23.129863', 'step': 20000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:23.189814', 'step': 20000, 'epoch': 3} {'type': 'loss', 'content': 0.11647389084100723, 'timestamp': '2025-09-10 03:01:23.191619', 'step': 20001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:23.246673', 'step': 20001, 'epoch': 3} {'type': 'loss', 'content': 0.2052755504846573, 'timestamp': '2025-09-10 03:01:23.248493', 'step': 20002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:23.302246', 'step': 20002, 'epoch': 3} {'type': 'loss', 'content': 0.08168599754571915, 'timestamp': '2025-09-10 03:01:23.304191', 'step': 20003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:23.359016', 'step': 20003, 'epoch': 3} {'type': 'loss', 'content': 0.04730062186717987, 'timestamp': '2025-09-10 03:01:23.364776', 'step': 20004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:23.417251', 'step': 20004, 'epoch': 3} {'type': 'loss', 'content': 0.06719150394201279, 'timestamp': '2025-09-10 03:01:23.419450', 'step': 20005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:23.472054', 'step': 20005, 'epoch': 3} {'type': 'loss', 'content': 0.07196926325559616, 'timestamp': '2025-09-10 03:01:23.474390', 'step': 20006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:23.527417', 'step': 20006, 'epoch': 3} {'type': 'loss', 'content': 0.19940178096294403, 'timestamp': '2025-09-10 03:01:23.529685', 'step': 20007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:23.583134', 'step': 20007, 'epoch': 3} {'type': 'loss', 'content': 0.06420867145061493, 'timestamp': '2025-09-10 03:01:23.589197', 'step': 20008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:23.641801', 'step': 20008, 'epoch': 3} {'type': 'loss', 'content': 0.060661282390356064, 'timestamp': '2025-09-10 03:01:23.644156', 'step': 20009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:23.697206', 'step': 20009, 'epoch': 3} {'type': 'loss', 'content': 0.171034574508667, 'timestamp': '2025-09-10 03:01:23.699207', 'step': 20010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:23.752593', 'step': 20010, 'epoch': 3} {'type': 'loss', 'content': 0.1357860565185547, 'timestamp': '2025-09-10 03:01:23.754514', 'step': 20011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:23.807302', 'step': 20011, 'epoch': 3} {'type': 'loss', 'content': 0.13104218244552612, 'timestamp': '2025-09-10 03:01:23.812928', 'step': 20012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:23.865291', 'step': 20012, 'epoch': 3} {'type': 'loss', 'content': 0.03338811919093132, 'timestamp': '2025-09-10 03:01:23.867161', 'step': 20013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:23.920129', 'step': 20013, 'epoch': 3} {'type': 'loss', 'content': 0.08981627225875854, 'timestamp': '2025-09-10 03:01:23.922006', 'step': 20014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:23.974865', 'step': 20014, 'epoch': 3} {'type': 'loss', 'content': 0.15531301498413086, 'timestamp': '2025-09-10 03:01:23.977308', 'step': 20015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:24.030805', 'step': 20015, 'epoch': 3} {'type': 'loss', 'content': 0.13672494888305664, 'timestamp': '2025-09-10 03:01:24.036630', 'step': 20016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:24.089060', 'step': 20016, 'epoch': 3} {'type': 'loss', 'content': 0.08990231901407242, 'timestamp': '2025-09-10 03:01:24.091439', 'step': 20017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:24.144612', 'step': 20017, 'epoch': 3} {'type': 'loss', 'content': 0.07214821130037308, 'timestamp': '2025-09-10 03:01:24.146865', 'step': 20018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:24.200093', 'step': 20018, 'epoch': 3} {'type': 'loss', 'content': 0.15049001574516296, 'timestamp': '2025-09-10 03:01:24.202091', 'step': 20019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:24.254753', 'step': 20019, 'epoch': 3} {'type': 'loss', 'content': 0.09275513887405396, 'timestamp': '2025-09-10 03:01:24.261219', 'step': 20020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:24.313933', 'step': 20020, 'epoch': 3} {'type': 'loss', 'content': 0.05337369814515114, 'timestamp': '2025-09-10 03:01:24.316117', 'step': 20021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:24.368682', 'step': 20021, 'epoch': 3} {'type': 'loss', 'content': 0.09910430759191513, 'timestamp': '2025-09-10 03:01:24.370656', 'step': 20022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:24.423638', 'step': 20022, 'epoch': 3} {'type': 'loss', 'content': 0.168666273355484, 'timestamp': '2025-09-10 03:01:24.426300', 'step': 20023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:24.480023', 'step': 20023, 'epoch': 3} {'type': 'loss', 'content': 0.14091645181179047, 'timestamp': '2025-09-10 03:01:24.486153', 'step': 20024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:24.538899', 'step': 20024, 'epoch': 3} {'type': 'loss', 'content': 0.08817724138498306, 'timestamp': '2025-09-10 03:01:24.541277', 'step': 20025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:24.594148', 'step': 20025, 'epoch': 3} {'type': 'loss', 'content': 0.03793216496706009, 'timestamp': '2025-09-10 03:01:24.596576', 'step': 20026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:24.651188', 'step': 20026, 'epoch': 3} {'type': 'loss', 'content': 0.09462181478738785, 'timestamp': '2025-09-10 03:01:24.653672', 'step': 20027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:24.706604', 'step': 20027, 'epoch': 3} {'type': 'loss', 'content': 0.08441706001758575, 'timestamp': '2025-09-10 03:01:24.712118', 'step': 20028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:24.764632', 'step': 20028, 'epoch': 3} {'type': 'loss', 'content': 0.146155446767807, 'timestamp': '2025-09-10 03:01:24.766630', 'step': 20029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:24.819543', 'step': 20029, 'epoch': 3} {'type': 'loss', 'content': 0.18128716945648193, 'timestamp': '2025-09-10 03:01:24.821714', 'step': 20030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:24.874968', 'step': 20030, 'epoch': 3} {'type': 'loss', 'content': 0.0762593150138855, 'timestamp': '2025-09-10 03:01:24.877118', 'step': 20031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:24.930912', 'step': 20031, 'epoch': 3} {'type': 'loss', 'content': 0.0937378779053688, 'timestamp': '2025-09-10 03:01:24.936683', 'step': 20032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:24.995437', 'step': 20032, 'epoch': 3} {'type': 'loss', 'content': 0.10561517626047134, 'timestamp': '2025-09-10 03:01:24.997384', 'step': 20033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:25.050188', 'step': 20033, 'epoch': 3} {'type': 'loss', 'content': 0.1458144187927246, 'timestamp': '2025-09-10 03:01:25.052504', 'step': 20034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:25.105203', 'step': 20034, 'epoch': 3} {'type': 'loss', 'content': 0.10241292417049408, 'timestamp': '2025-09-10 03:01:25.107190', 'step': 20035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:25.159656', 'step': 20035, 'epoch': 3} {'type': 'loss', 'content': 0.08185494691133499, 'timestamp': '2025-09-10 03:01:25.165202', 'step': 20036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:25.217280', 'step': 20036, 'epoch': 3} {'type': 'loss', 'content': 0.08924834430217743, 'timestamp': '2025-09-10 03:01:25.219458', 'step': 20037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:25.271939', 'step': 20037, 'epoch': 3} {'type': 'loss', 'content': 0.12102178484201431, 'timestamp': '2025-09-10 03:01:25.274114', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 03:01:38.030812', 'step': 20038, 'epoch': 3} {'type': 'pplx', 'content': 10829.56157469135, 'timestamp': '2025-09-10 03:01:38.033844', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:38.089141', 'step': 20038, 'epoch': 3} {'type': 'loss', 'content': 0.11683078110218048, 'timestamp': '2025-09-10 03:01:38.091488', 'step': 20039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:38.147430', 'step': 20039, 'epoch': 3} {'type': 'loss', 'content': 0.14869344234466553, 'timestamp': '2025-09-10 03:01:38.153793', 'step': 20040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:38.208438', 'step': 20040, 'epoch': 3} {'type': 'loss', 'content': 0.10310061275959015, 'timestamp': '2025-09-10 03:01:38.210798', 'step': 20041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:38.267830', 'step': 20041, 'epoch': 3} {'type': 'loss', 'content': 0.09568490833044052, 'timestamp': '2025-09-10 03:01:38.270059', 'step': 20042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:38.326122', 'step': 20042, 'epoch': 3} {'type': 'loss', 'content': 0.04928664118051529, 'timestamp': '2025-09-10 03:01:38.328509', 'step': 20043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:38.382531', 'step': 20043, 'epoch': 3} {'type': 'loss', 'content': 0.06826993823051453, 'timestamp': '2025-09-10 03:01:38.388749', 'step': 20044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:38.441656', 'step': 20044, 'epoch': 3} {'type': 'loss', 'content': 0.05975167080760002, 'timestamp': '2025-09-10 03:01:38.443881', 'step': 20045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:38.497351', 'step': 20045, 'epoch': 3} {'type': 'loss', 'content': 0.09090618789196014, 'timestamp': '2025-09-10 03:01:38.499705', 'step': 20046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:38.552889', 'step': 20046, 'epoch': 3} {'type': 'loss', 'content': 0.051504604518413544, 'timestamp': '2025-09-10 03:01:38.555294', 'step': 20047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:38.608676', 'step': 20047, 'epoch': 3} {'type': 'loss', 'content': 0.11702354997396469, 'timestamp': '2025-09-10 03:01:38.614844', 'step': 20048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:38.667032', 'step': 20048, 'epoch': 3} {'type': 'loss', 'content': 0.05997694656252861, 'timestamp': '2025-09-10 03:01:38.669195', 'step': 20049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:38.722213', 'step': 20049, 'epoch': 3} {'type': 'loss', 'content': 0.0967278704047203, 'timestamp': '2025-09-10 03:01:38.724367', 'step': 20050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:38.776636', 'step': 20050, 'epoch': 3} {'type': 'loss', 'content': 0.03856703266501427, 'timestamp': '2025-09-10 03:01:38.778769', 'step': 20051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:38.831256', 'step': 20051, 'epoch': 3} {'type': 'loss', 'content': 0.05083458870649338, 'timestamp': '2025-09-10 03:01:38.836982', 'step': 20052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:38.889893', 'step': 20052, 'epoch': 3} {'type': 'loss', 'content': 0.08446083962917328, 'timestamp': '2025-09-10 03:01:38.892119', 'step': 20053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:38.946455', 'step': 20053, 'epoch': 3} {'type': 'loss', 'content': 0.0777982547879219, 'timestamp': '2025-09-10 03:01:38.948627', 'step': 20054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:39.003313', 'step': 20054, 'epoch': 3} {'type': 'loss', 'content': 0.06904130429029465, 'timestamp': '2025-09-10 03:01:39.005519', 'step': 20055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:39.058611', 'step': 20055, 'epoch': 3} {'type': 'loss', 'content': 0.136973574757576, 'timestamp': '2025-09-10 03:01:39.064515', 'step': 20056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:39.116979', 'step': 20056, 'epoch': 3} {'type': 'loss', 'content': 0.06674060970544815, 'timestamp': '2025-09-10 03:01:39.119174', 'step': 20057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:39.173256', 'step': 20057, 'epoch': 3} {'type': 'loss', 'content': 0.12394706904888153, 'timestamp': '2025-09-10 03:01:39.175447', 'step': 20058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:39.230276', 'step': 20058, 'epoch': 3} {'type': 'loss', 'content': 0.19682817161083221, 'timestamp': '2025-09-10 03:01:39.232469', 'step': 20059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:39.286000', 'step': 20059, 'epoch': 3} {'type': 'loss', 'content': 0.12252360582351685, 'timestamp': '2025-09-10 03:01:39.291840', 'step': 20060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:39.344697', 'step': 20060, 'epoch': 3} {'type': 'loss', 'content': 0.11854678392410278, 'timestamp': '2025-09-10 03:01:39.347028', 'step': 20061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:39.400013', 'step': 20061, 'epoch': 3} {'type': 'loss', 'content': 0.09386634081602097, 'timestamp': '2025-09-10 03:01:39.402278', 'step': 20062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:39.456355', 'step': 20062, 'epoch': 3} {'type': 'loss', 'content': 0.048147719353437424, 'timestamp': '2025-09-10 03:01:39.458500', 'step': 20063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:39.511908', 'step': 20063, 'epoch': 3} {'type': 'loss', 'content': 0.06971621513366699, 'timestamp': '2025-09-10 03:01:39.517862', 'step': 20064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:39.570763', 'step': 20064, 'epoch': 3} {'type': 'loss', 'content': 0.05438658967614174, 'timestamp': '2025-09-10 03:01:39.572710', 'step': 20065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:39.626940', 'step': 20065, 'epoch': 3} {'type': 'loss', 'content': 0.05881466716527939, 'timestamp': '2025-09-10 03:01:39.629186', 'step': 20066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:39.682278', 'step': 20066, 'epoch': 3} {'type': 'loss', 'content': 0.07580287754535675, 'timestamp': '2025-09-10 03:01:39.684299', 'step': 20067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:39.736914', 'step': 20067, 'epoch': 3} {'type': 'loss', 'content': 0.14000049233436584, 'timestamp': '2025-09-10 03:01:39.742625', 'step': 20068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:39.795294', 'step': 20068, 'epoch': 3} {'type': 'loss', 'content': 0.006994058843702078, 'timestamp': '2025-09-10 03:01:39.797435', 'step': 20069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:39.850285', 'step': 20069, 'epoch': 3} {'type': 'loss', 'content': 0.1211807131767273, 'timestamp': '2025-09-10 03:01:39.852505', 'step': 20070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:39.905099', 'step': 20070, 'epoch': 3} {'type': 'loss', 'content': 0.06694896519184113, 'timestamp': '2025-09-10 03:01:39.907227', 'step': 20071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:39.961729', 'step': 20071, 'epoch': 3} {'type': 'loss', 'content': 0.09859291464090347, 'timestamp': '2025-09-10 03:01:39.967740', 'step': 20072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:40.021635', 'step': 20072, 'epoch': 3} {'type': 'loss', 'content': 0.10208121687173843, 'timestamp': '2025-09-10 03:01:40.023875', 'step': 20073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:40.077157', 'step': 20073, 'epoch': 3} {'type': 'loss', 'content': 0.0832066535949707, 'timestamp': '2025-09-10 03:01:40.079308', 'step': 20074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:40.132326', 'step': 20074, 'epoch': 3} {'type': 'loss', 'content': 0.08422400802373886, 'timestamp': '2025-09-10 03:01:40.134708', 'step': 20075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:40.188078', 'step': 20075, 'epoch': 3} {'type': 'loss', 'content': 0.0876123309135437, 'timestamp': '2025-09-10 03:01:40.194113', 'step': 20076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:40.247719', 'step': 20076, 'epoch': 3} {'type': 'loss', 'content': 0.07448923587799072, 'timestamp': '2025-09-10 03:01:40.250002', 'step': 20077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:40.302630', 'step': 20077, 'epoch': 3} {'type': 'loss', 'content': 0.04609736055135727, 'timestamp': '2025-09-10 03:01:40.304858', 'step': 20078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:40.357723', 'step': 20078, 'epoch': 3} {'type': 'loss', 'content': 0.13424387574195862, 'timestamp': '2025-09-10 03:01:40.359859', 'step': 20079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:40.413827', 'step': 20079, 'epoch': 3} {'type': 'loss', 'content': 0.11406760662794113, 'timestamp': '2025-09-10 03:01:40.419514', 'step': 20080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:40.473090', 'step': 20080, 'epoch': 3} {'type': 'loss', 'content': 0.051344119012355804, 'timestamp': '2025-09-10 03:01:40.475289', 'step': 20081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:40.530390', 'step': 20081, 'epoch': 3} {'type': 'loss', 'content': 0.06981699168682098, 'timestamp': '2025-09-10 03:01:40.532594', 'step': 20082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:40.586154', 'step': 20082, 'epoch': 3} {'type': 'loss', 'content': 0.14056871831417084, 'timestamp': '2025-09-10 03:01:40.588320', 'step': 20083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:40.641198', 'step': 20083, 'epoch': 3} {'type': 'loss', 'content': 0.12909628450870514, 'timestamp': '2025-09-10 03:01:40.647153', 'step': 20084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:40.700160', 'step': 20084, 'epoch': 3} {'type': 'loss', 'content': 0.1430334895849228, 'timestamp': '2025-09-10 03:01:40.702401', 'step': 20085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:40.757172', 'step': 20085, 'epoch': 3} {'type': 'loss', 'content': 0.08430947363376617, 'timestamp': '2025-09-10 03:01:40.759390', 'step': 20086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:40.817165', 'step': 20086, 'epoch': 3} {'type': 'loss', 'content': 0.1349359005689621, 'timestamp': '2025-09-10 03:01:40.819272', 'step': 20087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:40.873462', 'step': 20087, 'epoch': 3} {'type': 'loss', 'content': 0.062108106911182404, 'timestamp': '2025-09-10 03:01:40.880300', 'step': 20088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:40.933250', 'step': 20088, 'epoch': 3} {'type': 'loss', 'content': 0.1923421323299408, 'timestamp': '2025-09-10 03:01:40.935642', 'step': 20089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:40.991759', 'step': 20089, 'epoch': 3} {'type': 'loss', 'content': 0.09915944188833237, 'timestamp': '2025-09-10 03:01:40.997458', 'step': 20090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:41.055558', 'step': 20090, 'epoch': 3} {'type': 'loss', 'content': 0.15797629952430725, 'timestamp': '2025-09-10 03:01:41.059652', 'step': 20091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:41.115185', 'step': 20091, 'epoch': 3} {'type': 'loss', 'content': 0.12601742148399353, 'timestamp': '2025-09-10 03:01:41.121284', 'step': 20092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:41.175230', 'step': 20092, 'epoch': 3} {'type': 'loss', 'content': 0.027985233813524246, 'timestamp': '2025-09-10 03:01:41.180399', 'step': 20093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:41.239985', 'step': 20093, 'epoch': 3} {'type': 'loss', 'content': 0.10199758410453796, 'timestamp': '2025-09-10 03:01:41.242314', 'step': 20094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:41.298541', 'step': 20094, 'epoch': 3} {'type': 'loss', 'content': 0.08891690522432327, 'timestamp': '2025-09-10 03:01:41.300790', 'step': 20095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:41.354186', 'step': 20095, 'epoch': 3} {'type': 'loss', 'content': 0.09865253418684006, 'timestamp': '2025-09-10 03:01:41.360184', 'step': 20096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:01:41.413416', 'step': 20096, 'epoch': 3} {'type': 'loss', 'content': 0.07419540733098984, 'timestamp': '2025-09-10 03:01:41.418714', 'step': 20097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:41.473971', 'step': 20097, 'epoch': 3} {'type': 'loss', 'content': 0.037672776728868484, 'timestamp': '2025-09-10 03:01:41.476177', 'step': 20098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:41.532736', 'step': 20098, 'epoch': 3} {'type': 'loss', 'content': 0.1587660014629364, 'timestamp': '2025-09-10 03:01:41.534959', 'step': 20099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:41.588979', 'step': 20099, 'epoch': 3} {'type': 'loss', 'content': 0.07549311220645905, 'timestamp': '2025-09-10 03:01:41.595107', 'step': 20100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:41.649616', 'step': 20100, 'epoch': 3} {'type': 'loss', 'content': 0.0913095623254776, 'timestamp': '2025-09-10 03:01:41.651787', 'step': 20101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:41.705023', 'step': 20101, 'epoch': 3} {'type': 'loss', 'content': 0.11624471843242645, 'timestamp': '2025-09-10 03:01:41.707153', 'step': 20102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:41.760902', 'step': 20102, 'epoch': 3} {'type': 'loss', 'content': 0.08519601076841354, 'timestamp': '2025-09-10 03:01:41.763123', 'step': 20103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:41.817017', 'step': 20103, 'epoch': 3} {'type': 'loss', 'content': 0.12031365185976028, 'timestamp': '2025-09-10 03:01:41.823179', 'step': 20104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:41.876003', 'step': 20104, 'epoch': 3} {'type': 'loss', 'content': 0.08545715361833572, 'timestamp': '2025-09-10 03:01:41.878444', 'step': 20105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:41.933624', 'step': 20105, 'epoch': 3} {'type': 'loss', 'content': 0.11611136794090271, 'timestamp': '2025-09-10 03:01:41.935944', 'step': 20106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:41.989357', 'step': 20106, 'epoch': 3} {'type': 'loss', 'content': 0.053149111568927765, 'timestamp': '2025-09-10 03:01:41.991594', 'step': 20107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:42.045313', 'step': 20107, 'epoch': 3} {'type': 'loss', 'content': 0.1500016152858734, 'timestamp': '2025-09-10 03:01:42.051047', 'step': 20108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:42.103985', 'step': 20108, 'epoch': 3} {'type': 'loss', 'content': 0.11919199675321579, 'timestamp': '2025-09-10 03:01:42.106128', 'step': 20109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:42.160375', 'step': 20109, 'epoch': 3} {'type': 'loss', 'content': 0.0587795190513134, 'timestamp': '2025-09-10 03:01:42.162480', 'step': 20110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:42.216303', 'step': 20110, 'epoch': 3} {'type': 'loss', 'content': 0.08384217321872711, 'timestamp': '2025-09-10 03:01:42.218634', 'step': 20111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:42.271897', 'step': 20111, 'epoch': 3} {'type': 'loss', 'content': 0.11924044787883759, 'timestamp': '2025-09-10 03:01:42.277952', 'step': 20112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:42.331350', 'step': 20112, 'epoch': 3} {'type': 'loss', 'content': 0.05675089359283447, 'timestamp': '2025-09-10 03:01:42.333623', 'step': 20113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:42.387109', 'step': 20113, 'epoch': 3} {'type': 'loss', 'content': 0.058862943202257156, 'timestamp': '2025-09-10 03:01:42.389529', 'step': 20114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:42.443912', 'step': 20114, 'epoch': 3} {'type': 'loss', 'content': 0.07235851138830185, 'timestamp': '2025-09-10 03:01:42.446437', 'step': 20115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:42.501140', 'step': 20115, 'epoch': 3} {'type': 'loss', 'content': 0.06512756645679474, 'timestamp': '2025-09-10 03:01:42.507217', 'step': 20116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:42.560393', 'step': 20116, 'epoch': 3} {'type': 'loss', 'content': 0.15656988322734833, 'timestamp': '2025-09-10 03:01:42.562526', 'step': 20117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:42.616316', 'step': 20117, 'epoch': 3} {'type': 'loss', 'content': 0.10972917824983597, 'timestamp': '2025-09-10 03:01:42.618424', 'step': 20118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:42.673677', 'step': 20118, 'epoch': 3} {'type': 'loss', 'content': 0.07241898775100708, 'timestamp': '2025-09-10 03:01:42.676121', 'step': 20119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:42.730759', 'step': 20119, 'epoch': 3} {'type': 'loss', 'content': 0.1685096174478531, 'timestamp': '2025-09-10 03:01:42.736835', 'step': 20120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:42.790366', 'step': 20120, 'epoch': 3} {'type': 'loss', 'content': 0.15695753693580627, 'timestamp': '2025-09-10 03:01:42.792492', 'step': 20121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:42.846459', 'step': 20121, 'epoch': 3} {'type': 'loss', 'content': 0.12256944924592972, 'timestamp': '2025-09-10 03:01:42.848642', 'step': 20122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:01:42.902961', 'step': 20122, 'epoch': 3} {'type': 'loss', 'content': 0.06275762617588043, 'timestamp': '2025-09-10 03:01:42.905295', 'step': 20123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:42.958360', 'step': 20123, 'epoch': 3} {'type': 'loss', 'content': 0.06743315607309341, 'timestamp': '2025-09-10 03:01:42.964294', 'step': 20124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:43.017199', 'step': 20124, 'epoch': 3} {'type': 'loss', 'content': 0.050129055976867676, 'timestamp': '2025-09-10 03:01:43.019218', 'step': 20125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:43.072854', 'step': 20125, 'epoch': 3} {'type': 'loss', 'content': 0.07082383334636688, 'timestamp': '2025-09-10 03:01:43.075032', 'step': 20126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:43.127974', 'step': 20126, 'epoch': 3} {'type': 'loss', 'content': 0.1038300022482872, 'timestamp': '2025-09-10 03:01:43.130333', 'step': 20127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:43.183846', 'step': 20127, 'epoch': 3} {'type': 'loss', 'content': 0.08141884207725525, 'timestamp': '2025-09-10 03:01:43.189901', 'step': 20128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:43.243523', 'step': 20128, 'epoch': 3} {'type': 'loss', 'content': 0.12341183423995972, 'timestamp': '2025-09-10 03:01:43.245810', 'step': 20129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:43.298878', 'step': 20129, 'epoch': 3} {'type': 'loss', 'content': 0.09375297278165817, 'timestamp': '2025-09-10 03:01:43.300928', 'step': 20130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:43.354169', 'step': 20130, 'epoch': 3} {'type': 'loss', 'content': 0.02437693439424038, 'timestamp': '2025-09-10 03:01:43.356329', 'step': 20131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:43.409864', 'step': 20131, 'epoch': 3} {'type': 'loss', 'content': 0.06027659773826599, 'timestamp': '2025-09-10 03:01:43.416010', 'step': 20132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:43.469613', 'step': 20132, 'epoch': 3} {'type': 'loss', 'content': 0.059856437146663666, 'timestamp': '2025-09-10 03:01:43.471897', 'step': 20133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:43.525820', 'step': 20133, 'epoch': 3} {'type': 'loss', 'content': 0.13944518566131592, 'timestamp': '2025-09-10 03:01:43.528087', 'step': 20134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:43.581433', 'step': 20134, 'epoch': 3} {'type': 'loss', 'content': 0.06207876279950142, 'timestamp': '2025-09-10 03:01:43.583575', 'step': 20135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:43.637969', 'step': 20135, 'epoch': 3} {'type': 'loss', 'content': 0.1277049034833908, 'timestamp': '2025-09-10 03:01:43.643761', 'step': 20136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:43.698399', 'step': 20136, 'epoch': 3} {'type': 'loss', 'content': 0.06085849553346634, 'timestamp': '2025-09-10 03:01:43.700576', 'step': 20137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:43.755204', 'step': 20137, 'epoch': 3} {'type': 'loss', 'content': 0.0695442482829094, 'timestamp': '2025-09-10 03:01:43.757257', 'step': 20138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:43.810776', 'step': 20138, 'epoch': 3} {'type': 'loss', 'content': 0.09168311208486557, 'timestamp': '2025-09-10 03:01:43.812954', 'step': 20139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:43.867128', 'step': 20139, 'epoch': 3} {'type': 'loss', 'content': 0.1027335673570633, 'timestamp': '2025-09-10 03:01:43.873171', 'step': 20140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:43.926157', 'step': 20140, 'epoch': 3} {'type': 'loss', 'content': 0.16479195654392242, 'timestamp': '2025-09-10 03:01:43.928474', 'step': 20141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:43.983064', 'step': 20141, 'epoch': 3} {'type': 'loss', 'content': 0.12638191878795624, 'timestamp': '2025-09-10 03:01:43.985200', 'step': 20142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:44.039552', 'step': 20142, 'epoch': 3} {'type': 'loss', 'content': 0.018396377563476562, 'timestamp': '2025-09-10 03:01:44.041760', 'step': 20143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:44.095962', 'step': 20143, 'epoch': 3} {'type': 'loss', 'content': 0.10369085520505905, 'timestamp': '2025-09-10 03:01:44.101609', 'step': 20144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:44.155432', 'step': 20144, 'epoch': 3} {'type': 'loss', 'content': 0.04058942571282387, 'timestamp': '2025-09-10 03:01:44.157603', 'step': 20145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:44.212686', 'step': 20145, 'epoch': 3} {'type': 'loss', 'content': 0.017498917877674103, 'timestamp': '2025-09-10 03:01:44.214762', 'step': 20146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:44.269407', 'step': 20146, 'epoch': 3} {'type': 'loss', 'content': 0.11281728744506836, 'timestamp': '2025-09-10 03:01:44.271804', 'step': 20147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:44.326541', 'step': 20147, 'epoch': 3} {'type': 'loss', 'content': 0.0503709502518177, 'timestamp': '2025-09-10 03:01:44.332493', 'step': 20148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:44.385293', 'step': 20148, 'epoch': 3} {'type': 'loss', 'content': 0.09912168234586716, 'timestamp': '2025-09-10 03:01:44.387305', 'step': 20149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:44.440832', 'step': 20149, 'epoch': 3} {'type': 'loss', 'content': 0.10771361738443375, 'timestamp': '2025-09-10 03:01:44.443323', 'step': 20150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:44.496770', 'step': 20150, 'epoch': 3} {'type': 'loss', 'content': 0.026087764650583267, 'timestamp': '2025-09-10 03:01:44.499342', 'step': 20151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:44.554072', 'step': 20151, 'epoch': 3} {'type': 'loss', 'content': 0.04612884297966957, 'timestamp': '2025-09-10 03:01:44.559851', 'step': 20152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:44.611892', 'step': 20152, 'epoch': 3} {'type': 'loss', 'content': 0.024040495976805687, 'timestamp': '2025-09-10 03:01:44.614005', 'step': 20153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:44.666749', 'step': 20153, 'epoch': 3} {'type': 'loss', 'content': 0.12443467974662781, 'timestamp': '2025-09-10 03:01:44.668954', 'step': 20154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:44.722607', 'step': 20154, 'epoch': 3} {'type': 'loss', 'content': 0.02085432969033718, 'timestamp': '2025-09-10 03:01:44.724738', 'step': 20155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:44.778175', 'step': 20155, 'epoch': 3} {'type': 'loss', 'content': 0.08341860771179199, 'timestamp': '2025-09-10 03:01:44.784446', 'step': 20156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:44.839047', 'step': 20156, 'epoch': 3} {'type': 'loss', 'content': 0.057319484651088715, 'timestamp': '2025-09-10 03:01:44.841311', 'step': 20157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:44.895004', 'step': 20157, 'epoch': 3} {'type': 'loss', 'content': 0.1375756859779358, 'timestamp': '2025-09-10 03:01:44.897142', 'step': 20158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:44.951085', 'step': 20158, 'epoch': 3} {'type': 'loss', 'content': 0.08462126553058624, 'timestamp': '2025-09-10 03:01:44.953285', 'step': 20159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:45.007012', 'step': 20159, 'epoch': 3} {'type': 'loss', 'content': 0.12446427345275879, 'timestamp': '2025-09-10 03:01:45.012874', 'step': 20160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:45.065507', 'step': 20160, 'epoch': 3} {'type': 'loss', 'content': 0.07748367637395859, 'timestamp': '2025-09-10 03:01:45.067748', 'step': 20161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:45.134026', 'step': 20161, 'epoch': 3} {'type': 'loss', 'content': 0.07141385972499847, 'timestamp': '2025-09-10 03:01:45.136340', 'step': 20162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:45.189347', 'step': 20162, 'epoch': 3} {'type': 'loss', 'content': 0.04804150015115738, 'timestamp': '2025-09-10 03:01:45.191499', 'step': 20163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:45.244720', 'step': 20163, 'epoch': 3} {'type': 'loss', 'content': 0.10496298968791962, 'timestamp': '2025-09-10 03:01:45.250422', 'step': 20164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:45.304463', 'step': 20164, 'epoch': 3} {'type': 'loss', 'content': 0.048837222158908844, 'timestamp': '2025-09-10 03:01:45.306633', 'step': 20165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:45.361385', 'step': 20165, 'epoch': 3} {'type': 'loss', 'content': 0.06721507757902145, 'timestamp': '2025-09-10 03:01:45.363547', 'step': 20166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:45.417553', 'step': 20166, 'epoch': 3} {'type': 'loss', 'content': 0.06045640632510185, 'timestamp': '2025-09-10 03:01:45.419760', 'step': 20167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:45.472881', 'step': 20167, 'epoch': 3} {'type': 'loss', 'content': 0.10135898739099503, 'timestamp': '2025-09-10 03:01:45.478826', 'step': 20168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:45.531779', 'step': 20168, 'epoch': 3} {'type': 'loss', 'content': 0.04679008200764656, 'timestamp': '2025-09-10 03:01:45.535185', 'step': 20169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:45.589410', 'step': 20169, 'epoch': 3} {'type': 'loss', 'content': 0.11419933289289474, 'timestamp': '2025-09-10 03:01:45.591553', 'step': 20170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:45.644617', 'step': 20170, 'epoch': 3} {'type': 'loss', 'content': 0.06020830571651459, 'timestamp': '2025-09-10 03:01:45.646774', 'step': 20171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:45.700507', 'step': 20171, 'epoch': 3} {'type': 'loss', 'content': 0.08415357023477554, 'timestamp': '2025-09-10 03:01:45.706253', 'step': 20172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:45.761836', 'step': 20172, 'epoch': 3} {'type': 'loss', 'content': 0.13104493916034698, 'timestamp': '2025-09-10 03:01:45.763960', 'step': 20173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:45.817579', 'step': 20173, 'epoch': 3} {'type': 'loss', 'content': 0.09720548242330551, 'timestamp': '2025-09-10 03:01:45.819700', 'step': 20174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:45.874065', 'step': 20174, 'epoch': 3} {'type': 'loss', 'content': 0.08996479958295822, 'timestamp': '2025-09-10 03:01:45.876410', 'step': 20175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:45.931491', 'step': 20175, 'epoch': 3} {'type': 'loss', 'content': 0.10840529203414917, 'timestamp': '2025-09-10 03:01:45.937590', 'step': 20176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:45.990539', 'step': 20176, 'epoch': 3} {'type': 'loss', 'content': 0.09387332201004028, 'timestamp': '2025-09-10 03:01:45.992661', 'step': 20177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:46.045625', 'step': 20177, 'epoch': 3} {'type': 'loss', 'content': 0.05640876293182373, 'timestamp': '2025-09-10 03:01:46.047769', 'step': 20178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:46.101233', 'step': 20178, 'epoch': 3} {'type': 'loss', 'content': 0.038942039012908936, 'timestamp': '2025-09-10 03:01:46.103418', 'step': 20179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:46.157410', 'step': 20179, 'epoch': 3} {'type': 'loss', 'content': 0.043932683765888214, 'timestamp': '2025-09-10 03:01:46.163228', 'step': 20180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:46.215512', 'step': 20180, 'epoch': 3} {'type': 'loss', 'content': 0.041336286813020706, 'timestamp': '2025-09-10 03:01:46.217686', 'step': 20181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:46.270792', 'step': 20181, 'epoch': 3} {'type': 'loss', 'content': 0.047701988369226456, 'timestamp': '2025-09-10 03:01:46.272942', 'step': 20182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:46.326029', 'step': 20182, 'epoch': 3} {'type': 'loss', 'content': 0.05321402847766876, 'timestamp': '2025-09-10 03:01:46.328199', 'step': 20183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:46.383113', 'step': 20183, 'epoch': 3} {'type': 'loss', 'content': 0.05729355290532112, 'timestamp': '2025-09-10 03:01:46.388959', 'step': 20184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:46.443498', 'step': 20184, 'epoch': 3} {'type': 'loss', 'content': 0.13579565286636353, 'timestamp': '2025-09-10 03:01:46.445629', 'step': 20185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:46.499153', 'step': 20185, 'epoch': 3} {'type': 'loss', 'content': 0.08994434028863907, 'timestamp': '2025-09-10 03:01:46.501285', 'step': 20186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:46.561569', 'step': 20186, 'epoch': 3} {'type': 'loss', 'content': 0.08768083900213242, 'timestamp': '2025-09-10 03:01:46.563743', 'step': 20187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:46.617391', 'step': 20187, 'epoch': 3} {'type': 'loss', 'content': 0.021595947444438934, 'timestamp': '2025-09-10 03:01:46.623256', 'step': 20188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:46.676799', 'step': 20188, 'epoch': 3} {'type': 'loss', 'content': 0.12968692183494568, 'timestamp': '2025-09-10 03:01:46.678917', 'step': 20189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:46.732210', 'step': 20189, 'epoch': 3} {'type': 'loss', 'content': 0.09242965281009674, 'timestamp': '2025-09-10 03:01:46.734539', 'step': 20190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:46.787919', 'step': 20190, 'epoch': 3} {'type': 'loss', 'content': 0.11298087984323502, 'timestamp': '2025-09-10 03:01:46.790304', 'step': 20191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:46.843961', 'step': 20191, 'epoch': 3} {'type': 'loss', 'content': 0.02549237199127674, 'timestamp': '2025-09-10 03:01:46.849776', 'step': 20192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:46.902926', 'step': 20192, 'epoch': 3} {'type': 'loss', 'content': 0.1619848608970642, 'timestamp': '2025-09-10 03:01:46.905064', 'step': 20193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:46.958325', 'step': 20193, 'epoch': 3} {'type': 'loss', 'content': 0.15961794555187225, 'timestamp': '2025-09-10 03:01:46.960619', 'step': 20194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:47.013881', 'step': 20194, 'epoch': 3} {'type': 'loss', 'content': 0.15248407423496246, 'timestamp': '2025-09-10 03:01:47.016058', 'step': 20195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:47.069232', 'step': 20195, 'epoch': 3} {'type': 'loss', 'content': 0.06872402131557465, 'timestamp': '2025-09-10 03:01:47.074986', 'step': 20196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:47.127115', 'step': 20196, 'epoch': 3} {'type': 'loss', 'content': 0.03645911440253258, 'timestamp': '2025-09-10 03:01:47.129268', 'step': 20197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:47.183622', 'step': 20197, 'epoch': 3} {'type': 'loss', 'content': 0.09264212101697922, 'timestamp': '2025-09-10 03:01:47.185786', 'step': 20198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:47.240176', 'step': 20198, 'epoch': 3} {'type': 'loss', 'content': 0.22216206789016724, 'timestamp': '2025-09-10 03:01:47.242344', 'step': 20199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:47.298529', 'step': 20199, 'epoch': 3} {'type': 'loss', 'content': 0.04739201068878174, 'timestamp': '2025-09-10 03:01:47.304408', 'step': 20200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:47.357834', 'step': 20200, 'epoch': 3} {'type': 'loss', 'content': 0.11842101067304611, 'timestamp': '2025-09-10 03:01:47.360118', 'step': 20201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:47.413524', 'step': 20201, 'epoch': 3} {'type': 'loss', 'content': 0.11917919665575027, 'timestamp': '2025-09-10 03:01:47.415660', 'step': 20202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:47.470745', 'step': 20202, 'epoch': 3} {'type': 'loss', 'content': 0.06858435273170471, 'timestamp': '2025-09-10 03:01:47.472945', 'step': 20203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:47.527109', 'step': 20203, 'epoch': 3} {'type': 'loss', 'content': 0.05770640820264816, 'timestamp': '2025-09-10 03:01:47.533213', 'step': 20204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:47.586942', 'step': 20204, 'epoch': 3} {'type': 'loss', 'content': 0.09833145141601562, 'timestamp': '2025-09-10 03:01:47.589199', 'step': 20205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:47.642697', 'step': 20205, 'epoch': 3} {'type': 'loss', 'content': 0.16908113658428192, 'timestamp': '2025-09-10 03:01:47.645283', 'step': 20206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:47.698523', 'step': 20206, 'epoch': 3} {'type': 'loss', 'content': 0.07330591231584549, 'timestamp': '2025-09-10 03:01:47.700811', 'step': 20207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:47.754644', 'step': 20207, 'epoch': 3} {'type': 'loss', 'content': 0.07145180553197861, 'timestamp': '2025-09-10 03:01:47.760534', 'step': 20208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:47.813464', 'step': 20208, 'epoch': 3} {'type': 'loss', 'content': 0.09255465865135193, 'timestamp': '2025-09-10 03:01:47.815689', 'step': 20209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:47.868701', 'step': 20209, 'epoch': 3} {'type': 'loss', 'content': 0.086031474173069, 'timestamp': '2025-09-10 03:01:47.870900', 'step': 20210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:47.924606', 'step': 20210, 'epoch': 3} {'type': 'loss', 'content': 0.06465369462966919, 'timestamp': '2025-09-10 03:01:47.926875', 'step': 20211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:47.980148', 'step': 20211, 'epoch': 3} {'type': 'loss', 'content': 0.10271167755126953, 'timestamp': '2025-09-10 03:01:47.986008', 'step': 20212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:48.039285', 'step': 20212, 'epoch': 3} {'type': 'loss', 'content': 0.18052318692207336, 'timestamp': '2025-09-10 03:01:48.041550', 'step': 20213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:48.094718', 'step': 20213, 'epoch': 3} {'type': 'loss', 'content': 0.04470744729042053, 'timestamp': '2025-09-10 03:01:48.096981', 'step': 20214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:48.149968', 'step': 20214, 'epoch': 3} {'type': 'loss', 'content': 0.04522756114602089, 'timestamp': '2025-09-10 03:01:48.152190', 'step': 20215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:48.206172', 'step': 20215, 'epoch': 3} {'type': 'loss', 'content': 0.0891047865152359, 'timestamp': '2025-09-10 03:01:48.212137', 'step': 20216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:48.265033', 'step': 20216, 'epoch': 3} {'type': 'loss', 'content': 0.15664611756801605, 'timestamp': '2025-09-10 03:01:48.267233', 'step': 20217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:48.320385', 'step': 20217, 'epoch': 3} {'type': 'loss', 'content': 0.12234631925821304, 'timestamp': '2025-09-10 03:01:48.322776', 'step': 20218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:48.376610', 'step': 20218, 'epoch': 3} {'type': 'loss', 'content': 0.09194814413785934, 'timestamp': '2025-09-10 03:01:48.379057', 'step': 20219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:48.432832', 'step': 20219, 'epoch': 3} {'type': 'loss', 'content': 0.07438261806964874, 'timestamp': '2025-09-10 03:01:48.438795', 'step': 20220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:48.499050', 'step': 20220, 'epoch': 3} {'type': 'loss', 'content': 0.09306885302066803, 'timestamp': '2025-09-10 03:01:48.501233', 'step': 20221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:48.554733', 'step': 20221, 'epoch': 3} {'type': 'loss', 'content': 0.0866491049528122, 'timestamp': '2025-09-10 03:01:48.557027', 'step': 20222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:48.610621', 'step': 20222, 'epoch': 3} {'type': 'loss', 'content': 0.12265074253082275, 'timestamp': '2025-09-10 03:01:48.612919', 'step': 20223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:48.666192', 'step': 20223, 'epoch': 3} {'type': 'loss', 'content': 0.04184268042445183, 'timestamp': '2025-09-10 03:01:48.672176', 'step': 20224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:48.725411', 'step': 20224, 'epoch': 3} {'type': 'loss', 'content': 0.03864219784736633, 'timestamp': '2025-09-10 03:01:48.727690', 'step': 20225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:48.780335', 'step': 20225, 'epoch': 3} {'type': 'loss', 'content': 0.0909959226846695, 'timestamp': '2025-09-10 03:01:48.782547', 'step': 20226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:48.836157', 'step': 20226, 'epoch': 3} {'type': 'loss', 'content': 0.09329960495233536, 'timestamp': '2025-09-10 03:01:48.838956', 'step': 20227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:48.891532', 'step': 20227, 'epoch': 3} {'type': 'loss', 'content': 0.09517212212085724, 'timestamp': '2025-09-10 03:01:48.897397', 'step': 20228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:48.950311', 'step': 20228, 'epoch': 3} {'type': 'loss', 'content': 0.12327045947313309, 'timestamp': '2025-09-10 03:01:48.952604', 'step': 20229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:49.006745', 'step': 20229, 'epoch': 3} {'type': 'loss', 'content': 0.07366561889648438, 'timestamp': '2025-09-10 03:01:49.009027', 'step': 20230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:49.063593', 'step': 20230, 'epoch': 3} {'type': 'loss', 'content': 0.04327256977558136, 'timestamp': '2025-09-10 03:01:49.065859', 'step': 20231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:49.119945', 'step': 20231, 'epoch': 3} {'type': 'loss', 'content': 0.10363958030939102, 'timestamp': '2025-09-10 03:01:49.125943', 'step': 20232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:49.180198', 'step': 20232, 'epoch': 3} {'type': 'loss', 'content': 0.13606233894824982, 'timestamp': '2025-09-10 03:01:49.182513', 'step': 20233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:49.235839', 'step': 20233, 'epoch': 3} {'type': 'loss', 'content': 0.23050500452518463, 'timestamp': '2025-09-10 03:01:49.238234', 'step': 20234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:49.291220', 'step': 20234, 'epoch': 3} {'type': 'loss', 'content': 0.0712551474571228, 'timestamp': '2025-09-10 03:01:49.293476', 'step': 20235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:49.347003', 'step': 20235, 'epoch': 3} {'type': 'loss', 'content': 0.15762735903263092, 'timestamp': '2025-09-10 03:01:49.352852', 'step': 20236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:49.405710', 'step': 20236, 'epoch': 3} {'type': 'loss', 'content': 0.04832577332854271, 'timestamp': '2025-09-10 03:01:49.408013', 'step': 20237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:49.461732', 'step': 20237, 'epoch': 3} {'type': 'loss', 'content': 0.05604810640215874, 'timestamp': '2025-09-10 03:01:49.464106', 'step': 20238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:49.517380', 'step': 20238, 'epoch': 3} {'type': 'loss', 'content': 0.07984215766191483, 'timestamp': '2025-09-10 03:01:49.519610', 'step': 20239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:49.573732', 'step': 20239, 'epoch': 3} {'type': 'loss', 'content': 0.10625188052654266, 'timestamp': '2025-09-10 03:01:49.579670', 'step': 20240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:49.633229', 'step': 20240, 'epoch': 3} {'type': 'loss', 'content': 0.07157295197248459, 'timestamp': '2025-09-10 03:01:49.635508', 'step': 20241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:49.690012', 'step': 20241, 'epoch': 3} {'type': 'loss', 'content': 0.07050244510173798, 'timestamp': '2025-09-10 03:01:49.692342', 'step': 20242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:49.745701', 'step': 20242, 'epoch': 3} {'type': 'loss', 'content': 0.06503909081220627, 'timestamp': '2025-09-10 03:01:49.747953', 'step': 20243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:49.801478', 'step': 20243, 'epoch': 3} {'type': 'loss', 'content': 0.17697200179100037, 'timestamp': '2025-09-10 03:01:49.807424', 'step': 20244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:49.859593', 'step': 20244, 'epoch': 3} {'type': 'loss', 'content': 0.08886291831731796, 'timestamp': '2025-09-10 03:01:49.861835', 'step': 20245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:49.915386', 'step': 20245, 'epoch': 3} {'type': 'loss', 'content': 0.06949630379676819, 'timestamp': '2025-09-10 03:01:49.917591', 'step': 20246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:49.970747', 'step': 20246, 'epoch': 3} {'type': 'loss', 'content': 0.06910340487957001, 'timestamp': '2025-09-10 03:01:49.973087', 'step': 20247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:50.026237', 'step': 20247, 'epoch': 3} {'type': 'loss', 'content': 0.08878283947706223, 'timestamp': '2025-09-10 03:01:50.032295', 'step': 20248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:50.084781', 'step': 20248, 'epoch': 3} {'type': 'loss', 'content': 0.058679163455963135, 'timestamp': '2025-09-10 03:01:50.087011', 'step': 20249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:01:50.141859', 'step': 20249, 'epoch': 3} {'type': 'loss', 'content': 0.0717921033501625, 'timestamp': '2025-09-10 03:01:50.144117', 'step': 20250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:50.198716', 'step': 20250, 'epoch': 3} {'type': 'loss', 'content': 0.07604173570871353, 'timestamp': '2025-09-10 03:01:50.200979', 'step': 20251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:50.255715', 'step': 20251, 'epoch': 3} {'type': 'loss', 'content': 0.10723327100276947, 'timestamp': '2025-09-10 03:01:50.261683', 'step': 20252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:50.314715', 'step': 20252, 'epoch': 3} {'type': 'loss', 'content': 0.10374100506305695, 'timestamp': '2025-09-10 03:01:50.317011', 'step': 20253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:50.369947', 'step': 20253, 'epoch': 3} {'type': 'loss', 'content': 0.0063046361319720745, 'timestamp': '2025-09-10 03:01:50.372168', 'step': 20254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:50.425889', 'step': 20254, 'epoch': 3} {'type': 'loss', 'content': 0.1355120688676834, 'timestamp': '2025-09-10 03:01:50.428290', 'step': 20255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:50.482339', 'step': 20255, 'epoch': 3} {'type': 'loss', 'content': 0.11186385899782181, 'timestamp': '2025-09-10 03:01:50.488333', 'step': 20256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:50.541289', 'step': 20256, 'epoch': 3} {'type': 'loss', 'content': 0.05901910364627838, 'timestamp': '2025-09-10 03:01:50.543591', 'step': 20257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:50.596227', 'step': 20257, 'epoch': 3} {'type': 'loss', 'content': 0.07896021753549576, 'timestamp': '2025-09-10 03:01:50.598449', 'step': 20258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:50.653913', 'step': 20258, 'epoch': 3} {'type': 'loss', 'content': 0.09985711425542831, 'timestamp': '2025-09-10 03:01:50.656193', 'step': 20259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:50.710598', 'step': 20259, 'epoch': 3} {'type': 'loss', 'content': 0.07520078867673874, 'timestamp': '2025-09-10 03:01:50.716600', 'step': 20260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:50.770016', 'step': 20260, 'epoch': 3} {'type': 'loss', 'content': 0.04752258583903313, 'timestamp': '2025-09-10 03:01:50.772388', 'step': 20261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:50.825940', 'step': 20261, 'epoch': 3} {'type': 'loss', 'content': 0.014477604068815708, 'timestamp': '2025-09-10 03:01:50.828324', 'step': 20262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:50.882136', 'step': 20262, 'epoch': 3} {'type': 'loss', 'content': 0.046801213175058365, 'timestamp': '2025-09-10 03:01:50.884331', 'step': 20263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:50.937614', 'step': 20263, 'epoch': 3} {'type': 'loss', 'content': 0.11472078412771225, 'timestamp': '2025-09-10 03:01:50.943612', 'step': 20264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:50.996392', 'step': 20264, 'epoch': 3} {'type': 'loss', 'content': 0.09276211261749268, 'timestamp': '2025-09-10 03:01:50.998635', 'step': 20265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:51.053354', 'step': 20265, 'epoch': 3} {'type': 'loss', 'content': 0.12628011405467987, 'timestamp': '2025-09-10 03:01:51.055590', 'step': 20266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:51.108166', 'step': 20266, 'epoch': 3} {'type': 'loss', 'content': 0.10336387902498245, 'timestamp': '2025-09-10 03:01:51.110405', 'step': 20267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:51.163610', 'step': 20267, 'epoch': 3} {'type': 'loss', 'content': 0.10645550489425659, 'timestamp': '2025-09-10 03:01:51.169415', 'step': 20268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:51.222480', 'step': 20268, 'epoch': 3} {'type': 'loss', 'content': 0.10050957649946213, 'timestamp': '2025-09-10 03:01:51.224774', 'step': 20269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:51.278445', 'step': 20269, 'epoch': 3} {'type': 'loss', 'content': 0.12559595704078674, 'timestamp': '2025-09-10 03:01:51.280578', 'step': 20270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:51.333621', 'step': 20270, 'epoch': 3} {'type': 'loss', 'content': 0.10430026054382324, 'timestamp': '2025-09-10 03:01:51.335768', 'step': 20271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:51.390062', 'step': 20271, 'epoch': 3} {'type': 'loss', 'content': 0.1287512332201004, 'timestamp': '2025-09-10 03:01:51.395936', 'step': 20272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:51.448496', 'step': 20272, 'epoch': 3} {'type': 'loss', 'content': 0.0674031674861908, 'timestamp': '2025-09-10 03:01:51.450660', 'step': 20273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:51.503521', 'step': 20273, 'epoch': 3} {'type': 'loss', 'content': 0.07334093004465103, 'timestamp': '2025-09-10 03:01:51.505756', 'step': 20274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:51.558620', 'step': 20274, 'epoch': 3} {'type': 'loss', 'content': 0.1041504517197609, 'timestamp': '2025-09-10 03:01:51.560693', 'step': 20275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:51.613432', 'step': 20275, 'epoch': 3} {'type': 'loss', 'content': 0.13085679709911346, 'timestamp': '2025-09-10 03:01:51.619465', 'step': 20276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:51.672596', 'step': 20276, 'epoch': 3} {'type': 'loss', 'content': 0.08738605678081512, 'timestamp': '2025-09-10 03:01:51.675005', 'step': 20277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:51.728973', 'step': 20277, 'epoch': 3} {'type': 'loss', 'content': 0.08335105329751968, 'timestamp': '2025-09-10 03:01:51.732844', 'step': 20278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:51.787384', 'step': 20278, 'epoch': 3} {'type': 'loss', 'content': 0.04416259750723839, 'timestamp': '2025-09-10 03:01:51.789708', 'step': 20279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:51.842671', 'step': 20279, 'epoch': 3} {'type': 'loss', 'content': 0.10768343508243561, 'timestamp': '2025-09-10 03:01:51.848727', 'step': 20280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:51.901467', 'step': 20280, 'epoch': 3} {'type': 'loss', 'content': 0.054234590381383896, 'timestamp': '2025-09-10 03:01:51.903703', 'step': 20281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:51.956756', 'step': 20281, 'epoch': 3} {'type': 'loss', 'content': 0.0716889277100563, 'timestamp': '2025-09-10 03:01:51.959136', 'step': 20282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:52.011768', 'step': 20282, 'epoch': 3} {'type': 'loss', 'content': 0.11068985611200333, 'timestamp': '2025-09-10 03:01:52.014012', 'step': 20283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:52.066816', 'step': 20283, 'epoch': 3} {'type': 'loss', 'content': 0.11180005222558975, 'timestamp': '2025-09-10 03:01:52.072711', 'step': 20284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:52.125314', 'step': 20284, 'epoch': 3} {'type': 'loss', 'content': 0.06351704150438309, 'timestamp': '2025-09-10 03:01:52.127511', 'step': 20285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:52.180620', 'step': 20285, 'epoch': 3} {'type': 'loss', 'content': 0.1188245415687561, 'timestamp': '2025-09-10 03:01:52.182834', 'step': 20286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:52.236450', 'step': 20286, 'epoch': 3} {'type': 'loss', 'content': 0.043886877596378326, 'timestamp': '2025-09-10 03:01:52.239015', 'step': 20287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:52.293950', 'step': 20287, 'epoch': 3} {'type': 'loss', 'content': 0.048918209969997406, 'timestamp': '2025-09-10 03:01:52.299991', 'step': 20288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:52.353209', 'step': 20288, 'epoch': 3} {'type': 'loss', 'content': 0.0867624431848526, 'timestamp': '2025-09-10 03:01:52.355506', 'step': 20289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:52.409923', 'step': 20289, 'epoch': 3} {'type': 'loss', 'content': 0.08029376715421677, 'timestamp': '2025-09-10 03:01:52.412189', 'step': 20290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:01:52.465457', 'step': 20290, 'epoch': 3} {'type': 'loss', 'content': 0.06175617873668671, 'timestamp': '2025-09-10 03:01:52.467991', 'step': 20291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:52.521256', 'step': 20291, 'epoch': 3} {'type': 'loss', 'content': 0.1411249190568924, 'timestamp': '2025-09-10 03:01:52.527152', 'step': 20292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:52.579804', 'step': 20292, 'epoch': 3} {'type': 'loss', 'content': 0.06204194575548172, 'timestamp': '2025-09-10 03:01:52.582016', 'step': 20293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:52.634833', 'step': 20293, 'epoch': 3} {'type': 'loss', 'content': 0.08594717085361481, 'timestamp': '2025-09-10 03:01:52.637002', 'step': 20294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:52.690254', 'step': 20294, 'epoch': 3} {'type': 'loss', 'content': 0.05422695726156235, 'timestamp': '2025-09-10 03:01:52.692351', 'step': 20295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:52.745844', 'step': 20295, 'epoch': 3} {'type': 'loss', 'content': 0.0193350650370121, 'timestamp': '2025-09-10 03:01:52.751463', 'step': 20296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:52.804728', 'step': 20296, 'epoch': 3} {'type': 'loss', 'content': 0.060235582292079926, 'timestamp': '2025-09-10 03:01:52.807017', 'step': 20297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:52.861271', 'step': 20297, 'epoch': 3} {'type': 'loss', 'content': 0.05534238740801811, 'timestamp': '2025-09-10 03:01:52.865028', 'step': 20298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:52.919236', 'step': 20298, 'epoch': 3} {'type': 'loss', 'content': 0.1196509301662445, 'timestamp': '2025-09-10 03:01:52.921488', 'step': 20299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:52.975095', 'step': 20299, 'epoch': 3} {'type': 'loss', 'content': 0.03745649755001068, 'timestamp': '2025-09-10 03:01:52.981068', 'step': 20300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:53.036454', 'step': 20300, 'epoch': 3} {'type': 'loss', 'content': 0.09089531004428864, 'timestamp': '2025-09-10 03:01:53.038757', 'step': 20301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:53.097771', 'step': 20301, 'epoch': 3} {'type': 'loss', 'content': 0.08446856588125229, 'timestamp': '2025-09-10 03:01:53.100049', 'step': 20302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:53.155345', 'step': 20302, 'epoch': 3} {'type': 'loss', 'content': 0.05620000883936882, 'timestamp': '2025-09-10 03:01:53.157398', 'step': 20303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:53.212868', 'step': 20303, 'epoch': 3} {'type': 'loss', 'content': 0.07077546417713165, 'timestamp': '2025-09-10 03:01:53.219666', 'step': 20304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:53.274222', 'step': 20304, 'epoch': 3} {'type': 'loss', 'content': 0.06686782091856003, 'timestamp': '2025-09-10 03:01:53.276696', 'step': 20305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:53.330316', 'step': 20305, 'epoch': 3} {'type': 'loss', 'content': 0.029636073857545853, 'timestamp': '2025-09-10 03:01:53.332563', 'step': 20306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:53.386490', 'step': 20306, 'epoch': 3} {'type': 'loss', 'content': 0.08415499329566956, 'timestamp': '2025-09-10 03:01:53.388777', 'step': 20307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:53.442515', 'step': 20307, 'epoch': 3} {'type': 'loss', 'content': 0.12055328488349915, 'timestamp': '2025-09-10 03:01:53.448759', 'step': 20308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:53.501781', 'step': 20308, 'epoch': 3} {'type': 'loss', 'content': 0.09475109726190567, 'timestamp': '2025-09-10 03:01:53.503956', 'step': 20309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:53.558558', 'step': 20309, 'epoch': 3} {'type': 'loss', 'content': 0.15121744573116302, 'timestamp': '2025-09-10 03:01:53.560763', 'step': 20310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:53.613820', 'step': 20310, 'epoch': 3} {'type': 'loss', 'content': 0.07478012889623642, 'timestamp': '2025-09-10 03:01:53.616069', 'step': 20311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:53.670864', 'step': 20311, 'epoch': 3} {'type': 'loss', 'content': 0.15667764842510223, 'timestamp': '2025-09-10 03:01:53.678440', 'step': 20312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:53.732239', 'step': 20312, 'epoch': 3} {'type': 'loss', 'content': 0.05007309466600418, 'timestamp': '2025-09-10 03:01:53.735209', 'step': 20313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:53.789065', 'step': 20313, 'epoch': 3} {'type': 'loss', 'content': 0.05908270925283432, 'timestamp': '2025-09-10 03:01:53.791221', 'step': 20314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:53.844032', 'step': 20314, 'epoch': 3} {'type': 'loss', 'content': 0.1264607310295105, 'timestamp': '2025-09-10 03:01:53.846540', 'step': 20315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:53.899579', 'step': 20315, 'epoch': 3} {'type': 'loss', 'content': 0.09989897161722183, 'timestamp': '2025-09-10 03:01:53.905706', 'step': 20316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:53.963264', 'step': 20316, 'epoch': 3} {'type': 'loss', 'content': 0.09633719176054001, 'timestamp': '2025-09-10 03:01:53.965554', 'step': 20317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:54.026081', 'step': 20317, 'epoch': 3} {'type': 'loss', 'content': 0.1564837247133255, 'timestamp': '2025-09-10 03:01:54.028311', 'step': 20318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:54.081292', 'step': 20318, 'epoch': 3} {'type': 'loss', 'content': 0.13114044070243835, 'timestamp': '2025-09-10 03:01:54.083684', 'step': 20319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:54.137496', 'step': 20319, 'epoch': 3} {'type': 'loss', 'content': 0.0731266513466835, 'timestamp': '2025-09-10 03:01:54.143731', 'step': 20320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:54.200046', 'step': 20320, 'epoch': 3} {'type': 'loss', 'content': 0.05121702328324318, 'timestamp': '2025-09-10 03:01:54.202397', 'step': 20321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:54.255129', 'step': 20321, 'epoch': 3} {'type': 'loss', 'content': 0.08845030516386032, 'timestamp': '2025-09-10 03:01:54.257444', 'step': 20322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:54.310523', 'step': 20322, 'epoch': 3} {'type': 'loss', 'content': 0.08711264282464981, 'timestamp': '2025-09-10 03:01:54.312908', 'step': 20323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:54.366570', 'step': 20323, 'epoch': 3} {'type': 'loss', 'content': 0.02107548527419567, 'timestamp': '2025-09-10 03:01:54.372801', 'step': 20324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:54.425288', 'step': 20324, 'epoch': 3} {'type': 'loss', 'content': 0.021089930087327957, 'timestamp': '2025-09-10 03:01:54.427606', 'step': 20325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:54.481684', 'step': 20325, 'epoch': 3} {'type': 'loss', 'content': 0.06851279735565186, 'timestamp': '2025-09-10 03:01:54.484094', 'step': 20326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:54.538395', 'step': 20326, 'epoch': 3} {'type': 'loss', 'content': 0.10339091718196869, 'timestamp': '2025-09-10 03:01:54.540630', 'step': 20327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:54.594833', 'step': 20327, 'epoch': 3} {'type': 'loss', 'content': 0.07781819254159927, 'timestamp': '2025-09-10 03:01:54.601074', 'step': 20328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:54.653879', 'step': 20328, 'epoch': 3} {'type': 'loss', 'content': 0.06645840406417847, 'timestamp': '2025-09-10 03:01:54.656110', 'step': 20329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:54.709030', 'step': 20329, 'epoch': 3} {'type': 'loss', 'content': 0.14306169748306274, 'timestamp': '2025-09-10 03:01:54.711272', 'step': 20330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:54.765749', 'step': 20330, 'epoch': 3} {'type': 'loss', 'content': 0.13271410763263702, 'timestamp': '2025-09-10 03:01:54.768082', 'step': 20331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:54.821474', 'step': 20331, 'epoch': 3} {'type': 'loss', 'content': 0.13022463023662567, 'timestamp': '2025-09-10 03:01:54.827464', 'step': 20332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:54.880302', 'step': 20332, 'epoch': 3} {'type': 'loss', 'content': 0.09276394546031952, 'timestamp': '2025-09-10 03:01:54.882654', 'step': 20333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:54.936425', 'step': 20333, 'epoch': 3} {'type': 'loss', 'content': 0.05947593227028847, 'timestamp': '2025-09-10 03:01:54.938786', 'step': 20334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:54.999110', 'step': 20334, 'epoch': 3} {'type': 'loss', 'content': 0.11136066913604736, 'timestamp': '2025-09-10 03:01:55.001463', 'step': 20335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:55.054898', 'step': 20335, 'epoch': 3} {'type': 'loss', 'content': 0.189219668507576, 'timestamp': '2025-09-10 03:01:55.060964', 'step': 20336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:55.113355', 'step': 20336, 'epoch': 3} {'type': 'loss', 'content': 0.09731346368789673, 'timestamp': '2025-09-10 03:01:55.115596', 'step': 20337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:55.168708', 'step': 20337, 'epoch': 3} {'type': 'loss', 'content': 0.13923364877700806, 'timestamp': '2025-09-10 03:01:55.170981', 'step': 20338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:55.224627', 'step': 20338, 'epoch': 3} {'type': 'loss', 'content': 0.065682552754879, 'timestamp': '2025-09-10 03:01:55.226842', 'step': 20339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:55.280284', 'step': 20339, 'epoch': 3} {'type': 'loss', 'content': 0.03368007764220238, 'timestamp': '2025-09-10 03:01:55.286235', 'step': 20340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:55.340093', 'step': 20340, 'epoch': 3} {'type': 'loss', 'content': 0.058996837586164474, 'timestamp': '2025-09-10 03:01:55.342303', 'step': 20341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:55.396005', 'step': 20341, 'epoch': 3} {'type': 'loss', 'content': 0.10563015192747116, 'timestamp': '2025-09-10 03:01:55.398260', 'step': 20342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:55.451249', 'step': 20342, 'epoch': 3} {'type': 'loss', 'content': 0.03386868163943291, 'timestamp': '2025-09-10 03:01:55.453316', 'step': 20343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:55.506764', 'step': 20343, 'epoch': 3} {'type': 'loss', 'content': 0.1101454496383667, 'timestamp': '2025-09-10 03:01:55.512877', 'step': 20344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:55.567182', 'step': 20344, 'epoch': 3} {'type': 'loss', 'content': 0.08449287712574005, 'timestamp': '2025-09-10 03:01:55.569469', 'step': 20345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:55.623336', 'step': 20345, 'epoch': 3} {'type': 'loss', 'content': 0.04528995230793953, 'timestamp': '2025-09-10 03:01:55.625385', 'step': 20346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:55.678929', 'step': 20346, 'epoch': 3} {'type': 'loss', 'content': 0.060623206198215485, 'timestamp': '2025-09-10 03:01:55.681314', 'step': 20347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:55.734171', 'step': 20347, 'epoch': 3} {'type': 'loss', 'content': 0.14104734361171722, 'timestamp': '2025-09-10 03:01:55.740035', 'step': 20348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:55.791979', 'step': 20348, 'epoch': 3} {'type': 'loss', 'content': 0.05763762444257736, 'timestamp': '2025-09-10 03:01:55.794238', 'step': 20349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:55.846785', 'step': 20349, 'epoch': 3} {'type': 'loss', 'content': 0.041415322571992874, 'timestamp': '2025-09-10 03:01:55.849171', 'step': 20350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:55.901755', 'step': 20350, 'epoch': 3} {'type': 'loss', 'content': 0.06436758488416672, 'timestamp': '2025-09-10 03:01:55.904001', 'step': 20351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:55.956720', 'step': 20351, 'epoch': 3} {'type': 'loss', 'content': 0.06429300457239151, 'timestamp': '2025-09-10 03:01:55.962582', 'step': 20352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:56.015258', 'step': 20352, 'epoch': 3} {'type': 'loss', 'content': 0.08722575008869171, 'timestamp': '2025-09-10 03:01:56.017561', 'step': 20353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:56.071305', 'step': 20353, 'epoch': 3} {'type': 'loss', 'content': 0.10355842858552933, 'timestamp': '2025-09-10 03:01:56.073614', 'step': 20354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:56.127577', 'step': 20354, 'epoch': 3} {'type': 'loss', 'content': 0.07323529571294785, 'timestamp': '2025-09-10 03:01:56.129846', 'step': 20355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:56.182786', 'step': 20355, 'epoch': 3} {'type': 'loss', 'content': 0.07355710119009018, 'timestamp': '2025-09-10 03:01:56.188744', 'step': 20356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:56.240988', 'step': 20356, 'epoch': 3} {'type': 'loss', 'content': 0.07764837145805359, 'timestamp': '2025-09-10 03:01:56.243223', 'step': 20357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:56.297231', 'step': 20357, 'epoch': 3} {'type': 'loss', 'content': 0.13232159614562988, 'timestamp': '2025-09-10 03:01:56.299469', 'step': 20358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:56.352441', 'step': 20358, 'epoch': 3} {'type': 'loss', 'content': 0.09737586975097656, 'timestamp': '2025-09-10 03:01:56.358098', 'step': 20359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:56.412917', 'step': 20359, 'epoch': 3} {'type': 'loss', 'content': 0.10018175840377808, 'timestamp': '2025-09-10 03:01:56.418859', 'step': 20360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:56.471595', 'step': 20360, 'epoch': 3} {'type': 'loss', 'content': 0.15482380986213684, 'timestamp': '2025-09-10 03:01:56.473797', 'step': 20361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:56.527054', 'step': 20361, 'epoch': 3} {'type': 'loss', 'content': 0.07742369920015335, 'timestamp': '2025-09-10 03:01:56.533148', 'step': 20362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:56.590386', 'step': 20362, 'epoch': 3} {'type': 'loss', 'content': 0.10260132700204849, 'timestamp': '2025-09-10 03:01:56.592761', 'step': 20363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:56.648177', 'step': 20363, 'epoch': 3} {'type': 'loss', 'content': 0.0687691792845726, 'timestamp': '2025-09-10 03:01:56.654296', 'step': 20364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:56.710179', 'step': 20364, 'epoch': 3} {'type': 'loss', 'content': 0.05669984593987465, 'timestamp': '2025-09-10 03:01:56.712209', 'step': 20365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:56.771517', 'step': 20365, 'epoch': 3} {'type': 'loss', 'content': 0.09647087752819061, 'timestamp': '2025-09-10 03:01:56.774092', 'step': 20366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:56.828807', 'step': 20366, 'epoch': 3} {'type': 'loss', 'content': 0.033397018909454346, 'timestamp': '2025-09-10 03:01:56.831030', 'step': 20367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:56.891427', 'step': 20367, 'epoch': 3} {'type': 'loss', 'content': 0.04936542734503746, 'timestamp': '2025-09-10 03:01:56.897625', 'step': 20368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:56.951671', 'step': 20368, 'epoch': 3} {'type': 'loss', 'content': 0.09017958492040634, 'timestamp': '2025-09-10 03:01:56.953939', 'step': 20369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:57.007524', 'step': 20369, 'epoch': 3} {'type': 'loss', 'content': 0.06238505616784096, 'timestamp': '2025-09-10 03:01:57.009758', 'step': 20370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:57.062775', 'step': 20370, 'epoch': 3} {'type': 'loss', 'content': 0.11506201326847076, 'timestamp': '2025-09-10 03:01:57.065425', 'step': 20371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:57.122261', 'step': 20371, 'epoch': 3} {'type': 'loss', 'content': 0.060461606830358505, 'timestamp': '2025-09-10 03:01:57.129163', 'step': 20372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:57.182996', 'step': 20372, 'epoch': 3} {'type': 'loss', 'content': 0.04960925504565239, 'timestamp': '2025-09-10 03:01:57.187580', 'step': 20373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:57.241568', 'step': 20373, 'epoch': 3} {'type': 'loss', 'content': 0.14351852238178253, 'timestamp': '2025-09-10 03:01:57.245084', 'step': 20374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:57.299145', 'step': 20374, 'epoch': 3} {'type': 'loss', 'content': 0.08126424998044968, 'timestamp': '2025-09-10 03:01:57.301471', 'step': 20375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:57.355751', 'step': 20375, 'epoch': 3} {'type': 'loss', 'content': 0.07671110332012177, 'timestamp': '2025-09-10 03:01:57.361728', 'step': 20376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:57.422364', 'step': 20376, 'epoch': 3} {'type': 'loss', 'content': 0.0835067629814148, 'timestamp': '2025-09-10 03:01:57.424489', 'step': 20377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:57.478085', 'step': 20377, 'epoch': 3} {'type': 'loss', 'content': 0.15994347631931305, 'timestamp': '2025-09-10 03:01:57.480160', 'step': 20378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:57.538341', 'step': 20378, 'epoch': 3} {'type': 'loss', 'content': 0.07712150365114212, 'timestamp': '2025-09-10 03:01:57.540597', 'step': 20379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:57.594445', 'step': 20379, 'epoch': 3} {'type': 'loss', 'content': 0.10124791413545609, 'timestamp': '2025-09-10 03:01:57.600428', 'step': 20380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:57.653772', 'step': 20380, 'epoch': 3} {'type': 'loss', 'content': 0.06556953489780426, 'timestamp': '2025-09-10 03:01:57.656051', 'step': 20381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:57.709714', 'step': 20381, 'epoch': 3} {'type': 'loss', 'content': 0.0856160894036293, 'timestamp': '2025-09-10 03:01:57.711967', 'step': 20382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:57.767353', 'step': 20382, 'epoch': 3} {'type': 'loss', 'content': 0.06189459562301636, 'timestamp': '2025-09-10 03:01:57.769620', 'step': 20383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:57.822850', 'step': 20383, 'epoch': 3} {'type': 'loss', 'content': 0.028797751292586327, 'timestamp': '2025-09-10 03:01:57.828817', 'step': 20384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:57.881263', 'step': 20384, 'epoch': 3} {'type': 'loss', 'content': 0.05905459448695183, 'timestamp': '2025-09-10 03:01:57.883564', 'step': 20385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:57.936392', 'step': 20385, 'epoch': 3} {'type': 'loss', 'content': 0.10682885348796844, 'timestamp': '2025-09-10 03:01:57.938652', 'step': 20386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:57.992228', 'step': 20386, 'epoch': 3} {'type': 'loss', 'content': 0.07834381610155106, 'timestamp': '2025-09-10 03:01:57.994525', 'step': 20387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:58.047626', 'step': 20387, 'epoch': 3} {'type': 'loss', 'content': 0.14885292947292328, 'timestamp': '2025-09-10 03:01:58.053464', 'step': 20388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:58.106052', 'step': 20388, 'epoch': 3} {'type': 'loss', 'content': 0.03866483271121979, 'timestamp': '2025-09-10 03:01:58.108303', 'step': 20389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:58.161482', 'step': 20389, 'epoch': 3} {'type': 'loss', 'content': 0.14987187087535858, 'timestamp': '2025-09-10 03:01:58.163720', 'step': 20390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:58.218066', 'step': 20390, 'epoch': 3} {'type': 'loss', 'content': 0.09879377484321594, 'timestamp': '2025-09-10 03:01:58.220563', 'step': 20391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:58.285648', 'step': 20391, 'epoch': 3} {'type': 'loss', 'content': 0.04231187701225281, 'timestamp': '2025-09-10 03:01:58.294565', 'step': 20392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:58.351062', 'step': 20392, 'epoch': 3} {'type': 'loss', 'content': 0.0626426488161087, 'timestamp': '2025-09-10 03:01:58.353379', 'step': 20393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:58.406098', 'step': 20393, 'epoch': 3} {'type': 'loss', 'content': 0.08777748048305511, 'timestamp': '2025-09-10 03:01:58.408351', 'step': 20394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:58.462123', 'step': 20394, 'epoch': 3} {'type': 'loss', 'content': 0.0698680728673935, 'timestamp': '2025-09-10 03:01:58.464399', 'step': 20395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:58.516961', 'step': 20395, 'epoch': 3} {'type': 'loss', 'content': 0.05153030529618263, 'timestamp': '2025-09-10 03:01:58.522897', 'step': 20396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:58.575258', 'step': 20396, 'epoch': 3} {'type': 'loss', 'content': 0.1646125167608261, 'timestamp': '2025-09-10 03:01:58.577409', 'step': 20397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:58.629765', 'step': 20397, 'epoch': 3} {'type': 'loss', 'content': 0.13567481935024261, 'timestamp': '2025-09-10 03:01:58.633201', 'step': 20398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:58.695441', 'step': 20398, 'epoch': 3} {'type': 'loss', 'content': 0.10517765581607819, 'timestamp': '2025-09-10 03:01:58.697709', 'step': 20399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:58.752224', 'step': 20399, 'epoch': 3} {'type': 'loss', 'content': 0.05747523531317711, 'timestamp': '2025-09-10 03:01:58.758243', 'step': 20400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:58.810795', 'step': 20400, 'epoch': 3} {'type': 'loss', 'content': 0.14444807171821594, 'timestamp': '2025-09-10 03:01:58.813033', 'step': 20401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:58.866515', 'step': 20401, 'epoch': 3} {'type': 'loss', 'content': 0.08318792283535004, 'timestamp': '2025-09-10 03:01:58.868804', 'step': 20402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:58.925897', 'step': 20402, 'epoch': 3} {'type': 'loss', 'content': 0.07258056104183197, 'timestamp': '2025-09-10 03:01:58.931451', 'step': 20403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:58.984646', 'step': 20403, 'epoch': 3} {'type': 'loss', 'content': 0.09667117893695831, 'timestamp': '2025-09-10 03:01:58.990568', 'step': 20404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:59.046363', 'step': 20404, 'epoch': 3} {'type': 'loss', 'content': 0.10836537927389145, 'timestamp': '2025-09-10 03:01:59.048715', 'step': 20405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:59.107137', 'step': 20405, 'epoch': 3} {'type': 'loss', 'content': 0.14619779586791992, 'timestamp': '2025-09-10 03:01:59.109622', 'step': 20406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:59.169076', 'step': 20406, 'epoch': 3} {'type': 'loss', 'content': 0.05973680317401886, 'timestamp': '2025-09-10 03:01:59.171321', 'step': 20407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:59.224846', 'step': 20407, 'epoch': 3} {'type': 'loss', 'content': 0.08938740938901901, 'timestamp': '2025-09-10 03:01:59.230811', 'step': 20408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:59.285618', 'step': 20408, 'epoch': 3} {'type': 'loss', 'content': 0.046490807086229324, 'timestamp': '2025-09-10 03:01:59.287895', 'step': 20409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:01:59.341176', 'step': 20409, 'epoch': 3} {'type': 'loss', 'content': 0.12615856528282166, 'timestamp': '2025-09-10 03:01:59.343400', 'step': 20410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:59.398307', 'step': 20410, 'epoch': 3} {'type': 'loss', 'content': 0.034551989287137985, 'timestamp': '2025-09-10 03:01:59.400425', 'step': 20411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:59.453695', 'step': 20411, 'epoch': 3} {'type': 'loss', 'content': 0.06789075583219528, 'timestamp': '2025-09-10 03:01:59.459788', 'step': 20412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:59.512234', 'step': 20412, 'epoch': 3} {'type': 'loss', 'content': 0.08724570274353027, 'timestamp': '2025-09-10 03:01:59.514793', 'step': 20413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:01:59.567513', 'step': 20413, 'epoch': 3} {'type': 'loss', 'content': 0.06812174618244171, 'timestamp': '2025-09-10 03:01:59.570051', 'step': 20414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:59.623738', 'step': 20414, 'epoch': 3} {'type': 'loss', 'content': 0.11767618358135223, 'timestamp': '2025-09-10 03:01:59.626005', 'step': 20415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:59.679205', 'step': 20415, 'epoch': 3} {'type': 'loss', 'content': 0.14930033683776855, 'timestamp': '2025-09-10 03:01:59.685076', 'step': 20416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:59.737359', 'step': 20416, 'epoch': 3} {'type': 'loss', 'content': 0.06167297810316086, 'timestamp': '2025-09-10 03:01:59.739574', 'step': 20417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:01:59.793659', 'step': 20417, 'epoch': 3} {'type': 'loss', 'content': 0.033791400492191315, 'timestamp': '2025-09-10 03:01:59.795892', 'step': 20418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:59.849710', 'step': 20418, 'epoch': 3} {'type': 'loss', 'content': 0.08348400145769119, 'timestamp': '2025-09-10 03:01:59.852056', 'step': 20419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:01:59.907559', 'step': 20419, 'epoch': 3} {'type': 'loss', 'content': 0.10810121893882751, 'timestamp': '2025-09-10 03:01:59.913733', 'step': 20420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:01:59.966932', 'step': 20420, 'epoch': 3} {'type': 'loss', 'content': 0.0508677177131176, 'timestamp': '2025-09-10 03:01:59.969188', 'step': 20421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:00.022991', 'step': 20421, 'epoch': 3} {'type': 'loss', 'content': 0.0989910289645195, 'timestamp': '2025-09-10 03:02:00.025237', 'step': 20422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:00.078878', 'step': 20422, 'epoch': 3} {'type': 'loss', 'content': 0.006636918056756258, 'timestamp': '2025-09-10 03:02:00.081130', 'step': 20423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:00.134876', 'step': 20423, 'epoch': 3} {'type': 'loss', 'content': 0.09749971330165863, 'timestamp': '2025-09-10 03:02:00.140810', 'step': 20424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:00.193880', 'step': 20424, 'epoch': 3} {'type': 'loss', 'content': 0.0925959125161171, 'timestamp': '2025-09-10 03:02:00.196211', 'step': 20425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:02:00.249929', 'step': 20425, 'epoch': 3} {'type': 'loss', 'content': 0.10358778387308121, 'timestamp': '2025-09-10 03:02:00.252236', 'step': 20426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:00.306363', 'step': 20426, 'epoch': 3} {'type': 'loss', 'content': 0.12035869061946869, 'timestamp': '2025-09-10 03:02:00.308607', 'step': 20427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:00.363579', 'step': 20427, 'epoch': 3} {'type': 'loss', 'content': 0.0934896245598793, 'timestamp': '2025-09-10 03:02:00.369573', 'step': 20428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:00.422202', 'step': 20428, 'epoch': 3} {'type': 'loss', 'content': 0.08096978813409805, 'timestamp': '2025-09-10 03:02:00.424500', 'step': 20429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:00.488735', 'step': 20429, 'epoch': 3} {'type': 'loss', 'content': 0.06878887861967087, 'timestamp': '2025-09-10 03:02:00.491113', 'step': 20430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:00.544675', 'step': 20430, 'epoch': 3} {'type': 'loss', 'content': 0.11341902613639832, 'timestamp': '2025-09-10 03:02:00.546920', 'step': 20431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:00.600380', 'step': 20431, 'epoch': 3} {'type': 'loss', 'content': 0.03311937674880028, 'timestamp': '2025-09-10 03:02:00.606265', 'step': 20432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:00.659249', 'step': 20432, 'epoch': 3} {'type': 'loss', 'content': 0.07394399493932724, 'timestamp': '2025-09-10 03:02:00.661690', 'step': 20433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:00.715882', 'step': 20433, 'epoch': 3} {'type': 'loss', 'content': 0.030488768592476845, 'timestamp': '2025-09-10 03:02:00.718253', 'step': 20434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:00.771684', 'step': 20434, 'epoch': 3} {'type': 'loss', 'content': 0.07123281806707382, 'timestamp': '2025-09-10 03:02:00.774069', 'step': 20435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:00.826746', 'step': 20435, 'epoch': 3} {'type': 'loss', 'content': 0.024223506450653076, 'timestamp': '2025-09-10 03:02:00.832549', 'step': 20436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:00.886479', 'step': 20436, 'epoch': 3} {'type': 'loss', 'content': 0.08947844058275223, 'timestamp': '2025-09-10 03:02:00.888764', 'step': 20437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:00.941992', 'step': 20437, 'epoch': 3} {'type': 'loss', 'content': 0.09970776736736298, 'timestamp': '2025-09-10 03:02:00.944100', 'step': 20438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:00.997351', 'step': 20438, 'epoch': 3} {'type': 'loss', 'content': 0.10398495197296143, 'timestamp': '2025-09-10 03:02:00.999572', 'step': 20439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:01.052508', 'step': 20439, 'epoch': 3} {'type': 'loss', 'content': 0.051754388958215714, 'timestamp': '2025-09-10 03:02:01.058375', 'step': 20440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:01.111177', 'step': 20440, 'epoch': 3} {'type': 'loss', 'content': 0.07396604865789413, 'timestamp': '2025-09-10 03:02:01.113434', 'step': 20441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:01.166174', 'step': 20441, 'epoch': 3} {'type': 'loss', 'content': 0.04574143514037132, 'timestamp': '2025-09-10 03:02:01.168540', 'step': 20442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:01.223007', 'step': 20442, 'epoch': 3} {'type': 'loss', 'content': 0.06198172643780708, 'timestamp': '2025-09-10 03:02:01.225437', 'step': 20443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:01.279009', 'step': 20443, 'epoch': 3} {'type': 'loss', 'content': 0.06963667273521423, 'timestamp': '2025-09-10 03:02:01.284854', 'step': 20444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:01.337334', 'step': 20444, 'epoch': 3} {'type': 'loss', 'content': 0.03322109580039978, 'timestamp': '2025-09-10 03:02:01.339602', 'step': 20445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 03:02:01.393662', 'step': 20445, 'epoch': 3} {'type': 'loss', 'content': 0.1595829576253891, 'timestamp': '2025-09-10 03:02:01.395890', 'step': 20446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:02:01.450863', 'step': 20446, 'epoch': 3} {'type': 'loss', 'content': 0.08724554628133774, 'timestamp': '2025-09-10 03:02:01.453109', 'step': 20447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:01.506591', 'step': 20447, 'epoch': 3} {'type': 'loss', 'content': 0.10125313699245453, 'timestamp': '2025-09-10 03:02:01.512682', 'step': 20448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:01.565812', 'step': 20448, 'epoch': 3} {'type': 'loss', 'content': 0.054005756974220276, 'timestamp': '2025-09-10 03:02:01.568307', 'step': 20449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:01.623357', 'step': 20449, 'epoch': 3} {'type': 'loss', 'content': 0.030998818576335907, 'timestamp': '2025-09-10 03:02:01.625624', 'step': 20450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:01.680019', 'step': 20450, 'epoch': 3} {'type': 'loss', 'content': 0.09726060926914215, 'timestamp': '2025-09-10 03:02:01.682256', 'step': 20451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:01.736207', 'step': 20451, 'epoch': 3} {'type': 'loss', 'content': 0.15406903624534607, 'timestamp': '2025-09-10 03:02:01.742191', 'step': 20452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:01.794922', 'step': 20452, 'epoch': 3} {'type': 'loss', 'content': 0.10645236819982529, 'timestamp': '2025-09-10 03:02:01.797182', 'step': 20453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:01.850453', 'step': 20453, 'epoch': 3} {'type': 'loss', 'content': 0.18972551822662354, 'timestamp': '2025-09-10 03:02:01.852747', 'step': 20454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:02:01.906170', 'step': 20454, 'epoch': 3} {'type': 'loss', 'content': 0.10395018756389618, 'timestamp': '2025-09-10 03:02:01.908410', 'step': 20455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:01.963325', 'step': 20455, 'epoch': 3} {'type': 'loss', 'content': 0.11625273525714874, 'timestamp': '2025-09-10 03:02:01.969399', 'step': 20456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:02.022107', 'step': 20456, 'epoch': 3} {'type': 'loss', 'content': 0.06826486438512802, 'timestamp': '2025-09-10 03:02:02.024439', 'step': 20457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:02.078301', 'step': 20457, 'epoch': 3} {'type': 'loss', 'content': 0.0674004852771759, 'timestamp': '2025-09-10 03:02:02.080509', 'step': 20458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:02.134444', 'step': 20458, 'epoch': 3} {'type': 'loss', 'content': 0.1551549881696701, 'timestamp': '2025-09-10 03:02:02.136669', 'step': 20459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:02.189964', 'step': 20459, 'epoch': 3} {'type': 'loss', 'content': 0.08949373662471771, 'timestamp': '2025-09-10 03:02:02.195822', 'step': 20460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:02.248934', 'step': 20460, 'epoch': 3} {'type': 'loss', 'content': 0.03551064804196358, 'timestamp': '2025-09-10 03:02:02.251135', 'step': 20461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:02.304620', 'step': 20461, 'epoch': 3} {'type': 'loss', 'content': 0.08579416573047638, 'timestamp': '2025-09-10 03:02:02.306828', 'step': 20462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:02.359678', 'step': 20462, 'epoch': 3} {'type': 'loss', 'content': 0.07958601415157318, 'timestamp': '2025-09-10 03:02:02.361813', 'step': 20463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:02.414926', 'step': 20463, 'epoch': 3} {'type': 'loss', 'content': 0.08735201507806778, 'timestamp': '2025-09-10 03:02:02.420748', 'step': 20464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:02.473539', 'step': 20464, 'epoch': 3} {'type': 'loss', 'content': 0.06470327079296112, 'timestamp': '2025-09-10 03:02:02.475740', 'step': 20465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:02.529612', 'step': 20465, 'epoch': 3} {'type': 'loss', 'content': 0.08024314790964127, 'timestamp': '2025-09-10 03:02:02.531891', 'step': 20466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:02.584812', 'step': 20466, 'epoch': 3} {'type': 'loss', 'content': 0.05075979232788086, 'timestamp': '2025-09-10 03:02:02.587059', 'step': 20467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:02.641434', 'step': 20467, 'epoch': 3} {'type': 'loss', 'content': 0.07255222648382187, 'timestamp': '2025-09-10 03:02:02.647323', 'step': 20468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:02.699395', 'step': 20468, 'epoch': 3} {'type': 'loss', 'content': 0.09623734652996063, 'timestamp': '2025-09-10 03:02:02.701655', 'step': 20469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:02.754682', 'step': 20469, 'epoch': 3} {'type': 'loss', 'content': 0.07990843057632446, 'timestamp': '2025-09-10 03:02:02.756979', 'step': 20470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:02.810790', 'step': 20470, 'epoch': 3} {'type': 'loss', 'content': 0.17356395721435547, 'timestamp': '2025-09-10 03:02:02.813051', 'step': 20471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:02.866303', 'step': 20471, 'epoch': 3} {'type': 'loss', 'content': 0.1677616685628891, 'timestamp': '2025-09-10 03:02:02.872155', 'step': 20472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:02.924673', 'step': 20472, 'epoch': 3} {'type': 'loss', 'content': 0.14470535516738892, 'timestamp': '2025-09-10 03:02:02.926846', 'step': 20473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:02.980641', 'step': 20473, 'epoch': 3} {'type': 'loss', 'content': 0.11879709362983704, 'timestamp': '2025-09-10 03:02:02.982915', 'step': 20474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:03.036660', 'step': 20474, 'epoch': 3} {'type': 'loss', 'content': 0.0687674731016159, 'timestamp': '2025-09-10 03:02:03.038876', 'step': 20475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:03.091765', 'step': 20475, 'epoch': 3} {'type': 'loss', 'content': 0.071222685277462, 'timestamp': '2025-09-10 03:02:03.097950', 'step': 20476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:03.150964', 'step': 20476, 'epoch': 3} {'type': 'loss', 'content': 0.08493243902921677, 'timestamp': '2025-09-10 03:02:03.153326', 'step': 20477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:03.206663', 'step': 20477, 'epoch': 3} {'type': 'loss', 'content': 0.07611225545406342, 'timestamp': '2025-09-10 03:02:03.209026', 'step': 20478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:03.262356', 'step': 20478, 'epoch': 3} {'type': 'loss', 'content': 0.07678159326314926, 'timestamp': '2025-09-10 03:02:03.264666', 'step': 20479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:03.320325', 'step': 20479, 'epoch': 3} {'type': 'loss', 'content': 0.05105934292078018, 'timestamp': '2025-09-10 03:02:03.326175', 'step': 20480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:03.379910', 'step': 20480, 'epoch': 3} {'type': 'loss', 'content': 0.05471992865204811, 'timestamp': '2025-09-10 03:02:03.382125', 'step': 20481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:03.434929', 'step': 20481, 'epoch': 3} {'type': 'loss', 'content': 0.03260478004813194, 'timestamp': '2025-09-10 03:02:03.437182', 'step': 20482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:02:03.490622', 'step': 20482, 'epoch': 3} {'type': 'loss', 'content': 0.08952420949935913, 'timestamp': '2025-09-10 03:02:03.492688', 'step': 20483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:02:03.546320', 'step': 20483, 'epoch': 3} {'type': 'loss', 'content': 0.055175259709358215, 'timestamp': '2025-09-10 03:02:03.552221', 'step': 20484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:02:03.606424', 'step': 20484, 'epoch': 3} {'type': 'loss', 'content': 0.08596497774124146, 'timestamp': '2025-09-10 03:02:03.608458', 'step': 20485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:03.661216', 'step': 20485, 'epoch': 3} {'type': 'loss', 'content': 0.2849080264568329, 'timestamp': '2025-09-10 03:02:03.663487', 'step': 20486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:03.716299', 'step': 20486, 'epoch': 3} {'type': 'loss', 'content': 0.09599918872117996, 'timestamp': '2025-09-10 03:02:03.718575', 'step': 20487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:03.771717', 'step': 20487, 'epoch': 3} {'type': 'loss', 'content': 0.07477997988462448, 'timestamp': '2025-09-10 03:02:03.777774', 'step': 20488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:03.830374', 'step': 20488, 'epoch': 3} {'type': 'loss', 'content': 0.09241325408220291, 'timestamp': '2025-09-10 03:02:03.832610', 'step': 20489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:03.885555', 'step': 20489, 'epoch': 3} {'type': 'loss', 'content': 0.09336881339550018, 'timestamp': '2025-09-10 03:02:03.887746', 'step': 20490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:03.940805', 'step': 20490, 'epoch': 3} {'type': 'loss', 'content': 0.060865193605422974, 'timestamp': '2025-09-10 03:02:03.943212', 'step': 20491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:03.996940', 'step': 20491, 'epoch': 3} {'type': 'loss', 'content': 0.059776440262794495, 'timestamp': '2025-09-10 03:02:04.002957', 'step': 20492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:04.059703', 'step': 20492, 'epoch': 3} {'type': 'loss', 'content': 0.011823879554867744, 'timestamp': '2025-09-10 03:02:04.061980', 'step': 20493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:04.115805', 'step': 20493, 'epoch': 3} {'type': 'loss', 'content': 0.06949810683727264, 'timestamp': '2025-09-10 03:02:04.118076', 'step': 20494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:02:04.171804', 'step': 20494, 'epoch': 3} {'type': 'loss', 'content': 0.04971298202872276, 'timestamp': '2025-09-10 03:02:04.174051', 'step': 20495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:04.226594', 'step': 20495, 'epoch': 3} {'type': 'loss', 'content': 0.020725997164845467, 'timestamp': '2025-09-10 03:02:04.232559', 'step': 20496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:04.285302', 'step': 20496, 'epoch': 3} {'type': 'loss', 'content': 0.11230551451444626, 'timestamp': '2025-09-10 03:02:04.287537', 'step': 20497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:04.340702', 'step': 20497, 'epoch': 3} {'type': 'loss', 'content': 0.05563977733254433, 'timestamp': '2025-09-10 03:02:04.342940', 'step': 20498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:04.395678', 'step': 20498, 'epoch': 3} {'type': 'loss', 'content': 0.08410055190324783, 'timestamp': '2025-09-10 03:02:04.397860', 'step': 20499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:04.450831', 'step': 20499, 'epoch': 3} {'type': 'loss', 'content': 0.13114461302757263, 'timestamp': '2025-09-10 03:02:04.456625', 'step': 20500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20500', 'timestamp': '2025-09-10 03:02:04.845254', 'step': 20500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:04.900240', 'step': 20500, 'epoch': 3} {'type': 'loss', 'content': 0.03586669638752937, 'timestamp': '2025-09-10 03:02:04.902624', 'step': 20501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:04.956603', 'step': 20501, 'epoch': 3} {'type': 'loss', 'content': 0.039341870695352554, 'timestamp': '2025-09-10 03:02:04.958935', 'step': 20502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:05.012819', 'step': 20502, 'epoch': 3} {'type': 'loss', 'content': 0.0704561099410057, 'timestamp': '2025-09-10 03:02:05.015115', 'step': 20503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:05.068168', 'step': 20503, 'epoch': 3} {'type': 'loss', 'content': 0.06189176067709923, 'timestamp': '2025-09-10 03:02:05.074476', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 03:02:17.763704', 'step': 20504, 'epoch': 3} {'type': 'pplx', 'content': 11753.131071829705, 'timestamp': '2025-09-10 03:02:17.767109', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:17.820304', 'step': 20504, 'epoch': 3} {'type': 'loss', 'content': 0.1296149045228958, 'timestamp': '2025-09-10 03:02:17.822530', 'step': 20505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:17.876432', 'step': 20505, 'epoch': 3} {'type': 'loss', 'content': 0.022266533225774765, 'timestamp': '2025-09-10 03:02:17.878659', 'step': 20506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:17.931607', 'step': 20506, 'epoch': 3} {'type': 'loss', 'content': 0.04644712433218956, 'timestamp': '2025-09-10 03:02:17.933813', 'step': 20507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:17.987206', 'step': 20507, 'epoch': 3} {'type': 'loss', 'content': 0.06907427310943604, 'timestamp': '2025-09-10 03:02:17.993330', 'step': 20508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:18.045690', 'step': 20508, 'epoch': 3} {'type': 'loss', 'content': 0.09531246870756149, 'timestamp': '2025-09-10 03:02:18.047972', 'step': 20509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:18.101323', 'step': 20509, 'epoch': 3} {'type': 'loss', 'content': 0.07430536299943924, 'timestamp': '2025-09-10 03:02:18.103839', 'step': 20510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:18.156971', 'step': 20510, 'epoch': 3} {'type': 'loss', 'content': 0.10826722532510757, 'timestamp': '2025-09-10 03:02:18.159450', 'step': 20511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:18.213280', 'step': 20511, 'epoch': 3} {'type': 'loss', 'content': 0.04760047793388367, 'timestamp': '2025-09-10 03:02:18.219625', 'step': 20512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:18.272409', 'step': 20512, 'epoch': 3} {'type': 'loss', 'content': 0.06824864447116852, 'timestamp': '2025-09-10 03:02:18.274898', 'step': 20513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:18.327966', 'step': 20513, 'epoch': 3} {'type': 'loss', 'content': 0.08079467713832855, 'timestamp': '2025-09-10 03:02:18.330457', 'step': 20514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:18.383893', 'step': 20514, 'epoch': 3} {'type': 'loss', 'content': 0.09357552230358124, 'timestamp': '2025-09-10 03:02:18.386405', 'step': 20515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:18.441530', 'step': 20515, 'epoch': 3} {'type': 'loss', 'content': 0.07061758637428284, 'timestamp': '2025-09-10 03:02:18.447611', 'step': 20516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:18.500281', 'step': 20516, 'epoch': 3} {'type': 'loss', 'content': 0.23039984703063965, 'timestamp': '2025-09-10 03:02:18.502674', 'step': 20517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:18.555931', 'step': 20517, 'epoch': 3} {'type': 'loss', 'content': 0.0812847688794136, 'timestamp': '2025-09-10 03:02:18.558335', 'step': 20518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:18.612183', 'step': 20518, 'epoch': 3} {'type': 'loss', 'content': 0.09775754809379578, 'timestamp': '2025-09-10 03:02:18.614665', 'step': 20519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:18.667568', 'step': 20519, 'epoch': 3} {'type': 'loss', 'content': 0.06508757919073105, 'timestamp': '2025-09-10 03:02:18.673695', 'step': 20520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:18.727581', 'step': 20520, 'epoch': 3} {'type': 'loss', 'content': 0.11883696168661118, 'timestamp': '2025-09-10 03:02:18.729928', 'step': 20521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:18.784622', 'step': 20521, 'epoch': 3} {'type': 'loss', 'content': 0.0922139585018158, 'timestamp': '2025-09-10 03:02:18.787013', 'step': 20522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:18.840611', 'step': 20522, 'epoch': 3} {'type': 'loss', 'content': 0.08776000142097473, 'timestamp': '2025-09-10 03:02:18.842970', 'step': 20523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:18.896167', 'step': 20523, 'epoch': 3} {'type': 'loss', 'content': 0.05513712763786316, 'timestamp': '2025-09-10 03:02:18.902269', 'step': 20524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:18.955603', 'step': 20524, 'epoch': 3} {'type': 'loss', 'content': 0.09385206550359726, 'timestamp': '2025-09-10 03:02:18.957938', 'step': 20525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:19.010702', 'step': 20525, 'epoch': 3} {'type': 'loss', 'content': 0.03523143753409386, 'timestamp': '2025-09-10 03:02:19.012818', 'step': 20526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:19.065951', 'step': 20526, 'epoch': 3} {'type': 'loss', 'content': 0.07659152150154114, 'timestamp': '2025-09-10 03:02:19.068289', 'step': 20527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:19.120897', 'step': 20527, 'epoch': 3} {'type': 'loss', 'content': 0.14221541583538055, 'timestamp': '2025-09-10 03:02:19.127150', 'step': 20528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:19.179545', 'step': 20528, 'epoch': 3} {'type': 'loss', 'content': 0.02869410254061222, 'timestamp': '2025-09-10 03:02:19.181975', 'step': 20529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:19.236214', 'step': 20529, 'epoch': 3} {'type': 'loss', 'content': 0.039834942668676376, 'timestamp': '2025-09-10 03:02:19.238665', 'step': 20530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:19.292441', 'step': 20530, 'epoch': 3} {'type': 'loss', 'content': 0.06441677361726761, 'timestamp': '2025-09-10 03:02:19.294794', 'step': 20531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:19.347643', 'step': 20531, 'epoch': 3} {'type': 'loss', 'content': 0.06796398758888245, 'timestamp': '2025-09-10 03:02:19.353755', 'step': 20532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:19.406729', 'step': 20532, 'epoch': 3} {'type': 'loss', 'content': 0.12748458981513977, 'timestamp': '2025-09-10 03:02:19.409111', 'step': 20533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:19.462402', 'step': 20533, 'epoch': 3} {'type': 'loss', 'content': 0.08198601007461548, 'timestamp': '2025-09-10 03:02:19.464740', 'step': 20534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:19.518984', 'step': 20534, 'epoch': 3} {'type': 'loss', 'content': 0.08510296791791916, 'timestamp': '2025-09-10 03:02:19.521307', 'step': 20535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:19.574326', 'step': 20535, 'epoch': 3} {'type': 'loss', 'content': 0.041993238031864166, 'timestamp': '2025-09-10 03:02:19.580370', 'step': 20536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:19.633487', 'step': 20536, 'epoch': 3} {'type': 'loss', 'content': 0.11092295497655869, 'timestamp': '2025-09-10 03:02:19.635887', 'step': 20537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:19.690266', 'step': 20537, 'epoch': 3} {'type': 'loss', 'content': 0.11189582943916321, 'timestamp': '2025-09-10 03:02:19.692642', 'step': 20538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:19.745908', 'step': 20538, 'epoch': 3} {'type': 'loss', 'content': 0.06974273175001144, 'timestamp': '2025-09-10 03:02:19.748439', 'step': 20539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:19.803717', 'step': 20539, 'epoch': 3} {'type': 'loss', 'content': 0.13336299359798431, 'timestamp': '2025-09-10 03:02:19.809801', 'step': 20540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:19.863797', 'step': 20540, 'epoch': 3} {'type': 'loss', 'content': 0.10895603895187378, 'timestamp': '2025-09-10 03:02:19.866150', 'step': 20541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:19.919869', 'step': 20541, 'epoch': 3} {'type': 'loss', 'content': 0.09008792042732239, 'timestamp': '2025-09-10 03:02:19.922358', 'step': 20542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:19.975516', 'step': 20542, 'epoch': 3} {'type': 'loss', 'content': 0.08271524310112, 'timestamp': '2025-09-10 03:02:19.977800', 'step': 20543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:20.030912', 'step': 20543, 'epoch': 3} {'type': 'loss', 'content': 0.12098407745361328, 'timestamp': '2025-09-10 03:02:20.037147', 'step': 20544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:20.089510', 'step': 20544, 'epoch': 3} {'type': 'loss', 'content': 0.11819688230752945, 'timestamp': '2025-09-10 03:02:20.091888', 'step': 20545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:20.145908', 'step': 20545, 'epoch': 3} {'type': 'loss', 'content': 0.19224686920642853, 'timestamp': '2025-09-10 03:02:20.148338', 'step': 20546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:20.201740', 'step': 20546, 'epoch': 3} {'type': 'loss', 'content': 0.15696093440055847, 'timestamp': '2025-09-10 03:02:20.204133', 'step': 20547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:20.257379', 'step': 20547, 'epoch': 3} {'type': 'loss', 'content': 0.13105419278144836, 'timestamp': '2025-09-10 03:02:20.263335', 'step': 20548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:20.317511', 'step': 20548, 'epoch': 3} {'type': 'loss', 'content': 0.06186968833208084, 'timestamp': '2025-09-10 03:02:20.319951', 'step': 20549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:20.373944', 'step': 20549, 'epoch': 3} {'type': 'loss', 'content': 0.07943779230117798, 'timestamp': '2025-09-10 03:02:20.376330', 'step': 20550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:20.429705', 'step': 20550, 'epoch': 3} {'type': 'loss', 'content': 0.1166054829955101, 'timestamp': '2025-09-10 03:02:20.432118', 'step': 20551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:20.484977', 'step': 20551, 'epoch': 3} {'type': 'loss', 'content': 0.1271146833896637, 'timestamp': '2025-09-10 03:02:20.491097', 'step': 20552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:20.543636', 'step': 20552, 'epoch': 3} {'type': 'loss', 'content': 0.13856364786624908, 'timestamp': '2025-09-10 03:02:20.546006', 'step': 20553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:20.598908', 'step': 20553, 'epoch': 3} {'type': 'loss', 'content': 0.07365488260984421, 'timestamp': '2025-09-10 03:02:20.601465', 'step': 20554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:20.654382', 'step': 20554, 'epoch': 3} {'type': 'loss', 'content': 0.1730102002620697, 'timestamp': '2025-09-10 03:02:20.656837', 'step': 20555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:20.709532', 'step': 20555, 'epoch': 3} {'type': 'loss', 'content': 0.1036449670791626, 'timestamp': '2025-09-10 03:02:20.715656', 'step': 20556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:20.768024', 'step': 20556, 'epoch': 3} {'type': 'loss', 'content': 0.08826132118701935, 'timestamp': '2025-09-10 03:02:20.770520', 'step': 20557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:02:20.824466', 'step': 20557, 'epoch': 3} {'type': 'loss', 'content': 0.08986936509609222, 'timestamp': '2025-09-10 03:02:20.826901', 'step': 20558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:20.881397', 'step': 20558, 'epoch': 3} {'type': 'loss', 'content': 0.1425107717514038, 'timestamp': '2025-09-10 03:02:20.883493', 'step': 20559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:20.936656', 'step': 20559, 'epoch': 3} {'type': 'loss', 'content': 0.05406823754310608, 'timestamp': '2025-09-10 03:02:20.942756', 'step': 20560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:20.994873', 'step': 20560, 'epoch': 3} {'type': 'loss', 'content': 0.07443338632583618, 'timestamp': '2025-09-10 03:02:20.997186', 'step': 20561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:21.050233', 'step': 20561, 'epoch': 3} {'type': 'loss', 'content': 0.18792253732681274, 'timestamp': '2025-09-10 03:02:21.052554', 'step': 20562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:21.109105', 'step': 20562, 'epoch': 3} {'type': 'loss', 'content': 0.09422434121370316, 'timestamp': '2025-09-10 03:02:21.111408', 'step': 20563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:21.164280', 'step': 20563, 'epoch': 3} {'type': 'loss', 'content': 0.15534508228302002, 'timestamp': '2025-09-10 03:02:21.170278', 'step': 20564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:21.223607', 'step': 20564, 'epoch': 3} {'type': 'loss', 'content': 0.11986926943063736, 'timestamp': '2025-09-10 03:02:21.225914', 'step': 20565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:21.279779', 'step': 20565, 'epoch': 3} {'type': 'loss', 'content': 0.10437516123056412, 'timestamp': '2025-09-10 03:02:21.281987', 'step': 20566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:21.335818', 'step': 20566, 'epoch': 3} {'type': 'loss', 'content': 0.03838159143924713, 'timestamp': '2025-09-10 03:02:21.338059', 'step': 20567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:02:21.392608', 'step': 20567, 'epoch': 3} {'type': 'loss', 'content': 0.10595351457595825, 'timestamp': '2025-09-10 03:02:21.398853', 'step': 20568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:21.451726', 'step': 20568, 'epoch': 3} {'type': 'loss', 'content': 0.10336445271968842, 'timestamp': '2025-09-10 03:02:21.453923', 'step': 20569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:21.507210', 'step': 20569, 'epoch': 3} {'type': 'loss', 'content': 0.08317379653453827, 'timestamp': '2025-09-10 03:02:21.509552', 'step': 20570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:21.562503', 'step': 20570, 'epoch': 3} {'type': 'loss', 'content': 0.0626123771071434, 'timestamp': '2025-09-10 03:02:21.564794', 'step': 20571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:21.617732', 'step': 20571, 'epoch': 3} {'type': 'loss', 'content': 0.016747158020734787, 'timestamp': '2025-09-10 03:02:21.623796', 'step': 20572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:21.675922', 'step': 20572, 'epoch': 3} {'type': 'loss', 'content': 0.06177457049489021, 'timestamp': '2025-09-10 03:02:21.678812', 'step': 20573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:21.731509', 'step': 20573, 'epoch': 3} {'type': 'loss', 'content': 0.10737043619155884, 'timestamp': '2025-09-10 03:02:21.733746', 'step': 20574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:21.787197', 'step': 20574, 'epoch': 3} {'type': 'loss', 'content': 0.12733067572116852, 'timestamp': '2025-09-10 03:02:21.789470', 'step': 20575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:21.842021', 'step': 20575, 'epoch': 3} {'type': 'loss', 'content': 0.12369036674499512, 'timestamp': '2025-09-10 03:02:21.848016', 'step': 20576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:21.902064', 'step': 20576, 'epoch': 3} {'type': 'loss', 'content': 0.08238884806632996, 'timestamp': '2025-09-10 03:02:21.904303', 'step': 20577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:21.957714', 'step': 20577, 'epoch': 3} {'type': 'loss', 'content': 0.19325290620326996, 'timestamp': '2025-09-10 03:02:21.959951', 'step': 20578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:22.012811', 'step': 20578, 'epoch': 3} {'type': 'loss', 'content': 0.048941563814878464, 'timestamp': '2025-09-10 03:02:22.015191', 'step': 20579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:22.068582', 'step': 20579, 'epoch': 3} {'type': 'loss', 'content': 0.07545867562294006, 'timestamp': '2025-09-10 03:02:22.074620', 'step': 20580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:22.127458', 'step': 20580, 'epoch': 3} {'type': 'loss', 'content': 0.08127299696207047, 'timestamp': '2025-09-10 03:02:22.129784', 'step': 20581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:22.182842', 'step': 20581, 'epoch': 3} {'type': 'loss', 'content': 0.10060370713472366, 'timestamp': '2025-09-10 03:02:22.185244', 'step': 20582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:22.239063', 'step': 20582, 'epoch': 3} {'type': 'loss', 'content': 0.06600606441497803, 'timestamp': '2025-09-10 03:02:22.241351', 'step': 20583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:22.294579', 'step': 20583, 'epoch': 3} {'type': 'loss', 'content': 0.04043104872107506, 'timestamp': '2025-09-10 03:02:22.300526', 'step': 20584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:22.353208', 'step': 20584, 'epoch': 3} {'type': 'loss', 'content': 0.04363430663943291, 'timestamp': '2025-09-10 03:02:22.355649', 'step': 20585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:22.408716', 'step': 20585, 'epoch': 3} {'type': 'loss', 'content': 0.039807792752981186, 'timestamp': '2025-09-10 03:02:22.411109', 'step': 20586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:22.464315', 'step': 20586, 'epoch': 3} {'type': 'loss', 'content': 0.03688535839319229, 'timestamp': '2025-09-10 03:02:22.466596', 'step': 20587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:22.520177', 'step': 20587, 'epoch': 3} {'type': 'loss', 'content': 0.0800684466958046, 'timestamp': '2025-09-10 03:02:22.526225', 'step': 20588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:02:22.579249', 'step': 20588, 'epoch': 3} {'type': 'loss', 'content': 0.025696372613310814, 'timestamp': '2025-09-10 03:02:22.581703', 'step': 20589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:22.635061', 'step': 20589, 'epoch': 3} {'type': 'loss', 'content': 0.04331998899579048, 'timestamp': '2025-09-10 03:02:22.637617', 'step': 20590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:22.696977', 'step': 20590, 'epoch': 3} {'type': 'loss', 'content': 0.08998742699623108, 'timestamp': '2025-09-10 03:02:22.699514', 'step': 20591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:22.752083', 'step': 20591, 'epoch': 3} {'type': 'loss', 'content': 0.09140441566705704, 'timestamp': '2025-09-10 03:02:22.758074', 'step': 20592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:22.810860', 'step': 20592, 'epoch': 3} {'type': 'loss', 'content': 0.16463081538677216, 'timestamp': '2025-09-10 03:02:22.813187', 'step': 20593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:22.866713', 'step': 20593, 'epoch': 3} {'type': 'loss', 'content': 0.06994959712028503, 'timestamp': '2025-09-10 03:02:22.868991', 'step': 20594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:22.922270', 'step': 20594, 'epoch': 3} {'type': 'loss', 'content': 0.08260753750801086, 'timestamp': '2025-09-10 03:02:22.924550', 'step': 20595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:22.977593', 'step': 20595, 'epoch': 3} {'type': 'loss', 'content': 0.08845513314008713, 'timestamp': '2025-09-10 03:02:22.983730', 'step': 20596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:23.036747', 'step': 20596, 'epoch': 3} {'type': 'loss', 'content': 0.05561186373233795, 'timestamp': '2025-09-10 03:02:23.039055', 'step': 20597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:23.092591', 'step': 20597, 'epoch': 3} {'type': 'loss', 'content': 0.034528523683547974, 'timestamp': '2025-09-10 03:02:23.094855', 'step': 20598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:23.148528', 'step': 20598, 'epoch': 3} {'type': 'loss', 'content': 0.04806149750947952, 'timestamp': '2025-09-10 03:02:23.150980', 'step': 20599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:23.205392', 'step': 20599, 'epoch': 3} {'type': 'loss', 'content': 0.08361786603927612, 'timestamp': '2025-09-10 03:02:23.211540', 'step': 20600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:23.264463', 'step': 20600, 'epoch': 3} {'type': 'loss', 'content': 0.08727936446666718, 'timestamp': '2025-09-10 03:02:23.266871', 'step': 20601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:23.320265', 'step': 20601, 'epoch': 3} {'type': 'loss', 'content': 0.09522908926010132, 'timestamp': '2025-09-10 03:02:23.322644', 'step': 20602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:23.375446', 'step': 20602, 'epoch': 3} {'type': 'loss', 'content': 0.06276516616344452, 'timestamp': '2025-09-10 03:02:23.377704', 'step': 20603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:23.430635', 'step': 20603, 'epoch': 3} {'type': 'loss', 'content': 0.18948683142662048, 'timestamp': '2025-09-10 03:02:23.436510', 'step': 20604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:23.488731', 'step': 20604, 'epoch': 3} {'type': 'loss', 'content': 0.07469054311513901, 'timestamp': '2025-09-10 03:02:23.490860', 'step': 20605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:23.546608', 'step': 20605, 'epoch': 3} {'type': 'loss', 'content': 0.09810545295476913, 'timestamp': '2025-09-10 03:02:23.548819', 'step': 20606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:23.602244', 'step': 20606, 'epoch': 3} {'type': 'loss', 'content': 0.06688975542783737, 'timestamp': '2025-09-10 03:02:23.604353', 'step': 20607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:23.656632', 'step': 20607, 'epoch': 3} {'type': 'loss', 'content': 0.09000929445028305, 'timestamp': '2025-09-10 03:02:23.662680', 'step': 20608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:23.715005', 'step': 20608, 'epoch': 3} {'type': 'loss', 'content': 0.09115134924650192, 'timestamp': '2025-09-10 03:02:23.719678', 'step': 20609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:23.776140', 'step': 20609, 'epoch': 3} {'type': 'loss', 'content': 0.13297943770885468, 'timestamp': '2025-09-10 03:02:23.778403', 'step': 20610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:23.833678', 'step': 20610, 'epoch': 3} {'type': 'loss', 'content': 0.0509469136595726, 'timestamp': '2025-09-10 03:02:23.835967', 'step': 20611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:23.890295', 'step': 20611, 'epoch': 3} {'type': 'loss', 'content': 0.0835486575961113, 'timestamp': '2025-09-10 03:02:23.896208', 'step': 20612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:23.952276', 'step': 20612, 'epoch': 3} {'type': 'loss', 'content': 0.03472539409995079, 'timestamp': '2025-09-10 03:02:23.954676', 'step': 20613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:24.007364', 'step': 20613, 'epoch': 3} {'type': 'loss', 'content': 0.13234582543373108, 'timestamp': '2025-09-10 03:02:24.011598', 'step': 20614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:24.067950', 'step': 20614, 'epoch': 3} {'type': 'loss', 'content': 0.11798209697008133, 'timestamp': '2025-09-10 03:02:24.070322', 'step': 20615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:24.123959', 'step': 20615, 'epoch': 3} {'type': 'loss', 'content': 0.140561044216156, 'timestamp': '2025-09-10 03:02:24.129980', 'step': 20616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:24.182434', 'step': 20616, 'epoch': 3} {'type': 'loss', 'content': 0.051399219781160355, 'timestamp': '2025-09-10 03:02:24.186729', 'step': 20617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:24.239979', 'step': 20617, 'epoch': 3} {'type': 'loss', 'content': 0.11177902668714523, 'timestamp': '2025-09-10 03:02:24.242185', 'step': 20618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:24.295453', 'step': 20618, 'epoch': 3} {'type': 'loss', 'content': 0.09197665750980377, 'timestamp': '2025-09-10 03:02:24.297505', 'step': 20619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:24.350551', 'step': 20619, 'epoch': 3} {'type': 'loss', 'content': 0.05192839354276657, 'timestamp': '2025-09-10 03:02:24.356459', 'step': 20620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:24.408238', 'step': 20620, 'epoch': 3} {'type': 'loss', 'content': 0.10677531361579895, 'timestamp': '2025-09-10 03:02:24.410272', 'step': 20621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:24.462760', 'step': 20621, 'epoch': 3} {'type': 'loss', 'content': 0.054275739938020706, 'timestamp': '2025-09-10 03:02:24.464957', 'step': 20622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:24.519036', 'step': 20622, 'epoch': 3} {'type': 'loss', 'content': 0.10397607833147049, 'timestamp': '2025-09-10 03:02:24.521047', 'step': 20623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:24.573567', 'step': 20623, 'epoch': 3} {'type': 'loss', 'content': 0.05983317643404007, 'timestamp': '2025-09-10 03:02:24.579515', 'step': 20624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:24.632996', 'step': 20624, 'epoch': 3} {'type': 'loss', 'content': 0.10488428920507431, 'timestamp': '2025-09-10 03:02:24.635075', 'step': 20625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:24.687849', 'step': 20625, 'epoch': 3} {'type': 'loss', 'content': 0.08142643421888351, 'timestamp': '2025-09-10 03:02:24.689944', 'step': 20626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:24.743592', 'step': 20626, 'epoch': 3} {'type': 'loss', 'content': 0.08139302581548691, 'timestamp': '2025-09-10 03:02:24.745991', 'step': 20627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:24.799005', 'step': 20627, 'epoch': 3} {'type': 'loss', 'content': 0.07070238888263702, 'timestamp': '2025-09-10 03:02:24.804800', 'step': 20628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:24.858472', 'step': 20628, 'epoch': 3} {'type': 'loss', 'content': 0.09114561975002289, 'timestamp': '2025-09-10 03:02:24.860822', 'step': 20629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:24.914315', 'step': 20629, 'epoch': 3} {'type': 'loss', 'content': 0.07256270200014114, 'timestamp': '2025-09-10 03:02:24.916453', 'step': 20630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:24.972897', 'step': 20630, 'epoch': 3} {'type': 'loss', 'content': 0.09378468245267868, 'timestamp': '2025-09-10 03:02:24.975187', 'step': 20631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:25.029314', 'step': 20631, 'epoch': 3} {'type': 'loss', 'content': 0.024266669526696205, 'timestamp': '2025-09-10 03:02:25.034886', 'step': 20632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:25.086958', 'step': 20632, 'epoch': 3} {'type': 'loss', 'content': 0.046379588544368744, 'timestamp': '2025-09-10 03:02:25.088941', 'step': 20633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:25.142511', 'step': 20633, 'epoch': 3} {'type': 'loss', 'content': 0.11308848112821579, 'timestamp': '2025-09-10 03:02:25.144725', 'step': 20634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:25.198123', 'step': 20634, 'epoch': 3} {'type': 'loss', 'content': 0.07123904675245285, 'timestamp': '2025-09-10 03:02:25.200288', 'step': 20635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:25.253389', 'step': 20635, 'epoch': 3} {'type': 'loss', 'content': 0.0450684130191803, 'timestamp': '2025-09-10 03:02:25.259196', 'step': 20636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:25.312111', 'step': 20636, 'epoch': 3} {'type': 'loss', 'content': 0.09917756915092468, 'timestamp': '2025-09-10 03:02:25.314404', 'step': 20637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:25.367297', 'step': 20637, 'epoch': 3} {'type': 'loss', 'content': 0.021821951493620872, 'timestamp': '2025-09-10 03:02:25.369543', 'step': 20638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:25.422527', 'step': 20638, 'epoch': 3} {'type': 'loss', 'content': 0.04877333343029022, 'timestamp': '2025-09-10 03:02:25.424581', 'step': 20639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:25.477654', 'step': 20639, 'epoch': 3} {'type': 'loss', 'content': 0.08563774079084396, 'timestamp': '2025-09-10 03:02:25.483289', 'step': 20640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:25.535638', 'step': 20640, 'epoch': 3} {'type': 'loss', 'content': 0.03960488736629486, 'timestamp': '2025-09-10 03:02:25.537918', 'step': 20641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:25.597442', 'step': 20641, 'epoch': 3} {'type': 'loss', 'content': 0.09666929394006729, 'timestamp': '2025-09-10 03:02:25.599659', 'step': 20642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:25.654031', 'step': 20642, 'epoch': 3} {'type': 'loss', 'content': 0.11398304998874664, 'timestamp': '2025-09-10 03:02:25.656301', 'step': 20643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:25.713545', 'step': 20643, 'epoch': 3} {'type': 'loss', 'content': 0.0394580215215683, 'timestamp': '2025-09-10 03:02:25.719730', 'step': 20644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:25.774252', 'step': 20644, 'epoch': 3} {'type': 'loss', 'content': 0.0791800320148468, 'timestamp': '2025-09-10 03:02:25.776415', 'step': 20645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:25.829319', 'step': 20645, 'epoch': 3} {'type': 'loss', 'content': 0.12312155216932297, 'timestamp': '2025-09-10 03:02:25.831349', 'step': 20646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:25.888220', 'step': 20646, 'epoch': 3} {'type': 'loss', 'content': 0.07748162001371384, 'timestamp': '2025-09-10 03:02:25.890584', 'step': 20647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:25.943547', 'step': 20647, 'epoch': 3} {'type': 'loss', 'content': 0.0463101752102375, 'timestamp': '2025-09-10 03:02:25.949505', 'step': 20648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:26.002637', 'step': 20648, 'epoch': 3} {'type': 'loss', 'content': 0.0575614757835865, 'timestamp': '2025-09-10 03:02:26.004882', 'step': 20649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:26.057602', 'step': 20649, 'epoch': 3} {'type': 'loss', 'content': 0.1174832358956337, 'timestamp': '2025-09-10 03:02:26.059675', 'step': 20650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:26.119371', 'step': 20650, 'epoch': 3} {'type': 'loss', 'content': 0.06337303668260574, 'timestamp': '2025-09-10 03:02:26.121642', 'step': 20651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:26.180224', 'step': 20651, 'epoch': 3} {'type': 'loss', 'content': 0.08382903784513474, 'timestamp': '2025-09-10 03:02:26.186458', 'step': 20652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:26.245464', 'step': 20652, 'epoch': 3} {'type': 'loss', 'content': 0.07330961525440216, 'timestamp': '2025-09-10 03:02:26.247299', 'step': 20653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:26.308499', 'step': 20653, 'epoch': 3} {'type': 'loss', 'content': 0.048949290066957474, 'timestamp': '2025-09-10 03:02:26.310881', 'step': 20654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:26.367482', 'step': 20654, 'epoch': 3} {'type': 'loss', 'content': 0.0704348236322403, 'timestamp': '2025-09-10 03:02:26.369576', 'step': 20655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:26.424859', 'step': 20655, 'epoch': 3} {'type': 'loss', 'content': 0.12222124636173248, 'timestamp': '2025-09-10 03:02:26.430920', 'step': 20656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:26.486773', 'step': 20656, 'epoch': 3} {'type': 'loss', 'content': 0.08928235620260239, 'timestamp': '2025-09-10 03:02:26.489180', 'step': 20657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:26.544422', 'step': 20657, 'epoch': 3} {'type': 'loss', 'content': 0.07530413568019867, 'timestamp': '2025-09-10 03:02:26.546841', 'step': 20658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:26.600548', 'step': 20658, 'epoch': 3} {'type': 'loss', 'content': 0.08624418824911118, 'timestamp': '2025-09-10 03:02:26.602788', 'step': 20659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:26.656397', 'step': 20659, 'epoch': 3} {'type': 'loss', 'content': 0.10426782816648483, 'timestamp': '2025-09-10 03:02:26.662290', 'step': 20660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:26.720220', 'step': 20660, 'epoch': 3} {'type': 'loss', 'content': 0.047333601862192154, 'timestamp': '2025-09-10 03:02:26.722480', 'step': 20661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:26.776142', 'step': 20661, 'epoch': 3} {'type': 'loss', 'content': 0.07094238698482513, 'timestamp': '2025-09-10 03:02:26.778460', 'step': 20662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:26.831900', 'step': 20662, 'epoch': 3} {'type': 'loss', 'content': 0.08890350908041, 'timestamp': '2025-09-10 03:02:26.833947', 'step': 20663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:26.887541', 'step': 20663, 'epoch': 3} {'type': 'loss', 'content': 0.0822765976190567, 'timestamp': '2025-09-10 03:02:26.893440', 'step': 20664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:26.946975', 'step': 20664, 'epoch': 3} {'type': 'loss', 'content': 0.07489629834890366, 'timestamp': '2025-09-10 03:02:26.948943', 'step': 20665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.003591', 'step': 20665, 'epoch': 3} {'type': 'loss', 'content': 0.1056709811091423, 'timestamp': '2025-09-10 03:02:27.005618', 'step': 20666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.059316', 'step': 20666, 'epoch': 3} {'type': 'loss', 'content': 0.08294392377138138, 'timestamp': '2025-09-10 03:02:27.061408', 'step': 20667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:27.114712', 'step': 20667, 'epoch': 3} {'type': 'loss', 'content': 0.07990764081478119, 'timestamp': '2025-09-10 03:02:27.120593', 'step': 20668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.173798', 'step': 20668, 'epoch': 3} {'type': 'loss', 'content': 0.12881475687026978, 'timestamp': '2025-09-10 03:02:27.176081', 'step': 20669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.228558', 'step': 20669, 'epoch': 3} {'type': 'loss', 'content': 0.08235584944486618, 'timestamp': '2025-09-10 03:02:27.230919', 'step': 20670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.284031', 'step': 20670, 'epoch': 3} {'type': 'loss', 'content': 0.09373196959495544, 'timestamp': '2025-09-10 03:02:27.286570', 'step': 20671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.341684', 'step': 20671, 'epoch': 3} {'type': 'loss', 'content': 0.06478642672300339, 'timestamp': '2025-09-10 03:02:27.347984', 'step': 20672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:27.401449', 'step': 20672, 'epoch': 3} {'type': 'loss', 'content': 0.050349682569503784, 'timestamp': '2025-09-10 03:02:27.403723', 'step': 20673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:27.458071', 'step': 20673, 'epoch': 3} {'type': 'loss', 'content': 0.07695512473583221, 'timestamp': '2025-09-10 03:02:27.460186', 'step': 20674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:27.513441', 'step': 20674, 'epoch': 3} {'type': 'loss', 'content': 0.0654580295085907, 'timestamp': '2025-09-10 03:02:27.515305', 'step': 20675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.567710', 'step': 20675, 'epoch': 3} {'type': 'loss', 'content': 0.028164468705654144, 'timestamp': '2025-09-10 03:02:27.573390', 'step': 20676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:27.626688', 'step': 20676, 'epoch': 3} {'type': 'loss', 'content': 0.10057517886161804, 'timestamp': '2025-09-10 03:02:27.628883', 'step': 20677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:27.681847', 'step': 20677, 'epoch': 3} {'type': 'loss', 'content': 0.14397774636745453, 'timestamp': '2025-09-10 03:02:27.685170', 'step': 20678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:27.739262', 'step': 20678, 'epoch': 3} {'type': 'loss', 'content': 0.06159837543964386, 'timestamp': '2025-09-10 03:02:27.741370', 'step': 20679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.793985', 'step': 20679, 'epoch': 3} {'type': 'loss', 'content': 0.0981210470199585, 'timestamp': '2025-09-10 03:02:27.800760', 'step': 20680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.854095', 'step': 20680, 'epoch': 3} {'type': 'loss', 'content': 0.07839733362197876, 'timestamp': '2025-09-10 03:02:27.856234', 'step': 20681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:27.909603', 'step': 20681, 'epoch': 3} {'type': 'loss', 'content': 0.07800553739070892, 'timestamp': '2025-09-10 03:02:27.911642', 'step': 20682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:27.966183', 'step': 20682, 'epoch': 3} {'type': 'loss', 'content': 0.018711917102336884, 'timestamp': '2025-09-10 03:02:27.968566', 'step': 20683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:28.021458', 'step': 20683, 'epoch': 3} {'type': 'loss', 'content': 0.08967587351799011, 'timestamp': '2025-09-10 03:02:28.027173', 'step': 20684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:28.080086', 'step': 20684, 'epoch': 3} {'type': 'loss', 'content': 0.13475815951824188, 'timestamp': '2025-09-10 03:02:28.082240', 'step': 20685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:28.136194', 'step': 20685, 'epoch': 3} {'type': 'loss', 'content': 0.1389351636171341, 'timestamp': '2025-09-10 03:02:28.138587', 'step': 20686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:28.191551', 'step': 20686, 'epoch': 3} {'type': 'loss', 'content': 0.12506787478923798, 'timestamp': '2025-09-10 03:02:28.193991', 'step': 20687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:28.246707', 'step': 20687, 'epoch': 3} {'type': 'loss', 'content': 0.12891210615634918, 'timestamp': '2025-09-10 03:02:28.252548', 'step': 20688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:28.304916', 'step': 20688, 'epoch': 3} {'type': 'loss', 'content': 0.07712692022323608, 'timestamp': '2025-09-10 03:02:28.307176', 'step': 20689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:28.361249', 'step': 20689, 'epoch': 3} {'type': 'loss', 'content': 0.04547335207462311, 'timestamp': '2025-09-10 03:02:28.363490', 'step': 20690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:28.417230', 'step': 20690, 'epoch': 3} {'type': 'loss', 'content': 0.15747718513011932, 'timestamp': '2025-09-10 03:02:28.419317', 'step': 20691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:28.473226', 'step': 20691, 'epoch': 3} {'type': 'loss', 'content': 0.21763651072978973, 'timestamp': '2025-09-10 03:02:28.478979', 'step': 20692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:28.532124', 'step': 20692, 'epoch': 3} {'type': 'loss', 'content': 0.060461901128292084, 'timestamp': '2025-09-10 03:02:28.534278', 'step': 20693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:28.587989', 'step': 20693, 'epoch': 3} {'type': 'loss', 'content': 0.03686622157692909, 'timestamp': '2025-09-10 03:02:28.590503', 'step': 20694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:28.644293', 'step': 20694, 'epoch': 3} {'type': 'loss', 'content': 0.06469234079122543, 'timestamp': '2025-09-10 03:02:28.646355', 'step': 20695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:28.699657', 'step': 20695, 'epoch': 3} {'type': 'loss', 'content': 0.07417842000722885, 'timestamp': '2025-09-10 03:02:28.705716', 'step': 20696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:28.758317', 'step': 20696, 'epoch': 3} {'type': 'loss', 'content': 0.10862202942371368, 'timestamp': '2025-09-10 03:02:28.760348', 'step': 20697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:28.813036', 'step': 20697, 'epoch': 3} {'type': 'loss', 'content': 0.12641353905200958, 'timestamp': '2025-09-10 03:02:28.815190', 'step': 20698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:28.867630', 'step': 20698, 'epoch': 3} {'type': 'loss', 'content': 0.11216259747743607, 'timestamp': '2025-09-10 03:02:28.869949', 'step': 20699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:28.923295', 'step': 20699, 'epoch': 3} {'type': 'loss', 'content': 0.10080377757549286, 'timestamp': '2025-09-10 03:02:28.929251', 'step': 20700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:28.981733', 'step': 20700, 'epoch': 3} {'type': 'loss', 'content': 0.11711717396974564, 'timestamp': '2025-09-10 03:02:28.983764', 'step': 20701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.036391', 'step': 20701, 'epoch': 3} {'type': 'loss', 'content': 0.04748806729912758, 'timestamp': '2025-09-10 03:02:29.038206', 'step': 20702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.090610', 'step': 20702, 'epoch': 3} {'type': 'loss', 'content': 0.10653987526893616, 'timestamp': '2025-09-10 03:02:29.092474', 'step': 20703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:29.145644', 'step': 20703, 'epoch': 3} {'type': 'loss', 'content': 0.04967094585299492, 'timestamp': '2025-09-10 03:02:29.151109', 'step': 20704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:29.203290', 'step': 20704, 'epoch': 3} {'type': 'loss', 'content': 0.12040895968675613, 'timestamp': '2025-09-10 03:02:29.205556', 'step': 20705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:29.262122', 'step': 20705, 'epoch': 3} {'type': 'loss', 'content': 0.11524073034524918, 'timestamp': '2025-09-10 03:02:29.264299', 'step': 20706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.319617', 'step': 20706, 'epoch': 3} {'type': 'loss', 'content': 0.11933787167072296, 'timestamp': '2025-09-10 03:02:29.321647', 'step': 20707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.373844', 'step': 20707, 'epoch': 3} {'type': 'loss', 'content': 0.04846072569489479, 'timestamp': '2025-09-10 03:02:29.379828', 'step': 20708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.432200', 'step': 20708, 'epoch': 3} {'type': 'loss', 'content': 0.07892332971096039, 'timestamp': '2025-09-10 03:02:29.434518', 'step': 20709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.488230', 'step': 20709, 'epoch': 3} {'type': 'loss', 'content': 0.1014840230345726, 'timestamp': '2025-09-10 03:02:29.490134', 'step': 20710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.543932', 'step': 20710, 'epoch': 3} {'type': 'loss', 'content': 0.07827629148960114, 'timestamp': '2025-09-10 03:02:29.546192', 'step': 20711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.599000', 'step': 20711, 'epoch': 3} {'type': 'loss', 'content': 0.0730283260345459, 'timestamp': '2025-09-10 03:02:29.605302', 'step': 20712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:29.658403', 'step': 20712, 'epoch': 3} {'type': 'loss', 'content': 0.05566621944308281, 'timestamp': '2025-09-10 03:02:29.660379', 'step': 20713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.714142', 'step': 20713, 'epoch': 3} {'type': 'loss', 'content': 0.11193661391735077, 'timestamp': '2025-09-10 03:02:29.716826', 'step': 20714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:29.770736', 'step': 20714, 'epoch': 3} {'type': 'loss', 'content': 0.06688541173934937, 'timestamp': '2025-09-10 03:02:29.773453', 'step': 20715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:29.826423', 'step': 20715, 'epoch': 3} {'type': 'loss', 'content': 0.09515416622161865, 'timestamp': '2025-09-10 03:02:29.832072', 'step': 20716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:29.886970', 'step': 20716, 'epoch': 3} {'type': 'loss', 'content': 0.10774116963148117, 'timestamp': '2025-09-10 03:02:29.889462', 'step': 20717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:29.943343', 'step': 20717, 'epoch': 3} {'type': 'loss', 'content': 0.008918787352740765, 'timestamp': '2025-09-10 03:02:29.945367', 'step': 20718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:29.999398', 'step': 20718, 'epoch': 3} {'type': 'loss', 'content': 0.05912100151181221, 'timestamp': '2025-09-10 03:02:30.001713', 'step': 20719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:30.054930', 'step': 20719, 'epoch': 3} {'type': 'loss', 'content': 0.03409452363848686, 'timestamp': '2025-09-10 03:02:30.060571', 'step': 20720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:30.113770', 'step': 20720, 'epoch': 3} {'type': 'loss', 'content': 0.16860061883926392, 'timestamp': '2025-09-10 03:02:30.115617', 'step': 20721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:30.168791', 'step': 20721, 'epoch': 3} {'type': 'loss', 'content': 0.16891899704933167, 'timestamp': '2025-09-10 03:02:30.170719', 'step': 20722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:30.222997', 'step': 20722, 'epoch': 3} {'type': 'loss', 'content': 0.012709659524261951, 'timestamp': '2025-09-10 03:02:30.224947', 'step': 20723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:30.277674', 'step': 20723, 'epoch': 3} {'type': 'loss', 'content': 0.04440268129110336, 'timestamp': '2025-09-10 03:02:30.283309', 'step': 20724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:30.335659', 'step': 20724, 'epoch': 3} {'type': 'loss', 'content': 0.08498883247375488, 'timestamp': '2025-09-10 03:02:30.337709', 'step': 20725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:30.390651', 'step': 20725, 'epoch': 3} {'type': 'loss', 'content': 0.14490839838981628, 'timestamp': '2025-09-10 03:02:30.393029', 'step': 20726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:30.445555', 'step': 20726, 'epoch': 3} {'type': 'loss', 'content': 0.07326138764619827, 'timestamp': '2025-09-10 03:02:30.447777', 'step': 20727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:30.501116', 'step': 20727, 'epoch': 3} {'type': 'loss', 'content': 0.09129346162080765, 'timestamp': '2025-09-10 03:02:30.506970', 'step': 20728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:30.560421', 'step': 20728, 'epoch': 3} {'type': 'loss', 'content': 0.1393677443265915, 'timestamp': '2025-09-10 03:02:30.562408', 'step': 20729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:30.615450', 'step': 20729, 'epoch': 3} {'type': 'loss', 'content': 0.07676707208156586, 'timestamp': '2025-09-10 03:02:30.617471', 'step': 20730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:30.671000', 'step': 20730, 'epoch': 3} {'type': 'loss', 'content': 0.048674922436475754, 'timestamp': '2025-09-10 03:02:30.673122', 'step': 20731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:30.728385', 'step': 20731, 'epoch': 3} {'type': 'loss', 'content': 0.06765735894441605, 'timestamp': '2025-09-10 03:02:30.734109', 'step': 20732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:30.786609', 'step': 20732, 'epoch': 3} {'type': 'loss', 'content': 0.13467857241630554, 'timestamp': '2025-09-10 03:02:30.788877', 'step': 20733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:30.841515', 'step': 20733, 'epoch': 3} {'type': 'loss', 'content': 0.09955662488937378, 'timestamp': '2025-09-10 03:02:30.843565', 'step': 20734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:30.897006', 'step': 20734, 'epoch': 3} {'type': 'loss', 'content': 0.09349626302719116, 'timestamp': '2025-09-10 03:02:30.899062', 'step': 20735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:30.951920', 'step': 20735, 'epoch': 3} {'type': 'loss', 'content': 0.08254088461399078, 'timestamp': '2025-09-10 03:02:30.957567', 'step': 20736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:31.009712', 'step': 20736, 'epoch': 3} {'type': 'loss', 'content': 0.04978911578655243, 'timestamp': '2025-09-10 03:02:31.011500', 'step': 20737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:31.065398', 'step': 20737, 'epoch': 3} {'type': 'loss', 'content': 0.10715027153491974, 'timestamp': '2025-09-10 03:02:31.067292', 'step': 20738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:31.120806', 'step': 20738, 'epoch': 3} {'type': 'loss', 'content': 0.018457721918821335, 'timestamp': '2025-09-10 03:02:31.122938', 'step': 20739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:31.177657', 'step': 20739, 'epoch': 3} {'type': 'loss', 'content': 0.129865825176239, 'timestamp': '2025-09-10 03:02:31.183713', 'step': 20740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:31.236547', 'step': 20740, 'epoch': 3} {'type': 'loss', 'content': 0.06856118142604828, 'timestamp': '2025-09-10 03:02:31.238494', 'step': 20741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:31.291798', 'step': 20741, 'epoch': 3} {'type': 'loss', 'content': 0.15850937366485596, 'timestamp': '2025-09-10 03:02:31.293909', 'step': 20742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:31.347121', 'step': 20742, 'epoch': 3} {'type': 'loss', 'content': 0.07210499048233032, 'timestamp': '2025-09-10 03:02:31.349525', 'step': 20743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:31.402535', 'step': 20743, 'epoch': 3} {'type': 'loss', 'content': 0.06804905831813812, 'timestamp': '2025-09-10 03:02:31.408110', 'step': 20744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:31.460463', 'step': 20744, 'epoch': 3} {'type': 'loss', 'content': 0.08294598013162613, 'timestamp': '2025-09-10 03:02:31.462401', 'step': 20745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:31.515059', 'step': 20745, 'epoch': 3} {'type': 'loss', 'content': 0.1454886645078659, 'timestamp': '2025-09-10 03:02:31.517066', 'step': 20746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:31.570681', 'step': 20746, 'epoch': 3} {'type': 'loss', 'content': 0.0683511272072792, 'timestamp': '2025-09-10 03:02:31.572781', 'step': 20747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:31.626464', 'step': 20747, 'epoch': 3} {'type': 'loss', 'content': 0.03754688799381256, 'timestamp': '2025-09-10 03:02:31.632371', 'step': 20748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:31.685453', 'step': 20748, 'epoch': 3} {'type': 'loss', 'content': 0.05587833374738693, 'timestamp': '2025-09-10 03:02:31.687731', 'step': 20749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:31.740542', 'step': 20749, 'epoch': 3} {'type': 'loss', 'content': 0.10421823710203171, 'timestamp': '2025-09-10 03:02:31.742827', 'step': 20750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:31.795378', 'step': 20750, 'epoch': 3} {'type': 'loss', 'content': 0.10612265020608902, 'timestamp': '2025-09-10 03:02:31.797673', 'step': 20751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:31.850982', 'step': 20751, 'epoch': 3} {'type': 'loss', 'content': 0.06137019395828247, 'timestamp': '2025-09-10 03:02:31.857046', 'step': 20752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:02:31.910025', 'step': 20752, 'epoch': 3} {'type': 'loss', 'content': 0.10482994467020035, 'timestamp': '2025-09-10 03:02:31.911970', 'step': 20753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:31.964434', 'step': 20753, 'epoch': 3} {'type': 'loss', 'content': 0.07775284349918365, 'timestamp': '2025-09-10 03:02:31.966334', 'step': 20754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:32.019093', 'step': 20754, 'epoch': 3} {'type': 'loss', 'content': 0.027019592002034187, 'timestamp': '2025-09-10 03:02:32.021061', 'step': 20755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:32.073281', 'step': 20755, 'epoch': 3} {'type': 'loss', 'content': 0.08676236122846603, 'timestamp': '2025-09-10 03:02:32.078699', 'step': 20756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:32.131872', 'step': 20756, 'epoch': 3} {'type': 'loss', 'content': 0.07007218152284622, 'timestamp': '2025-09-10 03:02:32.134127', 'step': 20757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:32.187140', 'step': 20757, 'epoch': 3} {'type': 'loss', 'content': 0.08139601349830627, 'timestamp': '2025-09-10 03:02:32.189572', 'step': 20758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:32.242841', 'step': 20758, 'epoch': 3} {'type': 'loss', 'content': 0.055079273879528046, 'timestamp': '2025-09-10 03:02:32.244726', 'step': 20759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:32.297530', 'step': 20759, 'epoch': 3} {'type': 'loss', 'content': 0.08594191819429398, 'timestamp': '2025-09-10 03:02:32.303149', 'step': 20760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:32.357226', 'step': 20760, 'epoch': 3} {'type': 'loss', 'content': 0.07302607595920563, 'timestamp': '2025-09-10 03:02:32.359364', 'step': 20761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:32.411867', 'step': 20761, 'epoch': 3} {'type': 'loss', 'content': 0.1347551941871643, 'timestamp': '2025-09-10 03:02:32.414118', 'step': 20762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:32.467064', 'step': 20762, 'epoch': 3} {'type': 'loss', 'content': 0.056740861386060715, 'timestamp': '2025-09-10 03:02:32.469119', 'step': 20763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:32.522285', 'step': 20763, 'epoch': 3} {'type': 'loss', 'content': 0.079868383705616, 'timestamp': '2025-09-10 03:02:32.528031', 'step': 20764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:32.579884', 'step': 20764, 'epoch': 3} {'type': 'loss', 'content': 0.15151140093803406, 'timestamp': '2025-09-10 03:02:32.581961', 'step': 20765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:32.635323', 'step': 20765, 'epoch': 3} {'type': 'loss', 'content': 0.09635934978723526, 'timestamp': '2025-09-10 03:02:32.637433', 'step': 20766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:32.690773', 'step': 20766, 'epoch': 3} {'type': 'loss', 'content': 0.11628325283527374, 'timestamp': '2025-09-10 03:02:32.692979', 'step': 20767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:32.746135', 'step': 20767, 'epoch': 3} {'type': 'loss', 'content': 0.07813084125518799, 'timestamp': '2025-09-10 03:02:32.751851', 'step': 20768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:32.805757', 'step': 20768, 'epoch': 3} {'type': 'loss', 'content': 0.08621320873498917, 'timestamp': '2025-09-10 03:02:32.807821', 'step': 20769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:32.861162', 'step': 20769, 'epoch': 3} {'type': 'loss', 'content': 0.1770007312297821, 'timestamp': '2025-09-10 03:02:32.863377', 'step': 20770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:32.917105', 'step': 20770, 'epoch': 3} {'type': 'loss', 'content': 0.11098866164684296, 'timestamp': '2025-09-10 03:02:32.919184', 'step': 20771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:32.972094', 'step': 20771, 'epoch': 3} {'type': 'loss', 'content': 0.08822616934776306, 'timestamp': '2025-09-10 03:02:32.977741', 'step': 20772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:33.031305', 'step': 20772, 'epoch': 3} {'type': 'loss', 'content': 0.10730784386396408, 'timestamp': '2025-09-10 03:02:33.033405', 'step': 20773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:33.086507', 'step': 20773, 'epoch': 3} {'type': 'loss', 'content': 0.03211958706378937, 'timestamp': '2025-09-10 03:02:33.088730', 'step': 20774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:33.141767', 'step': 20774, 'epoch': 3} {'type': 'loss', 'content': 0.12518030405044556, 'timestamp': '2025-09-10 03:02:33.143935', 'step': 20775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:33.198077', 'step': 20775, 'epoch': 3} {'type': 'loss', 'content': 0.07391313463449478, 'timestamp': '2025-09-10 03:02:33.204168', 'step': 20776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:33.257285', 'step': 20776, 'epoch': 3} {'type': 'loss', 'content': 0.08541665971279144, 'timestamp': '2025-09-10 03:02:33.259325', 'step': 20777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:33.312978', 'step': 20777, 'epoch': 3} {'type': 'loss', 'content': 0.1006382554769516, 'timestamp': '2025-09-10 03:02:33.315360', 'step': 20778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:33.368202', 'step': 20778, 'epoch': 3} {'type': 'loss', 'content': 0.09159586578607559, 'timestamp': '2025-09-10 03:02:33.370184', 'step': 20779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:33.422877', 'step': 20779, 'epoch': 3} {'type': 'loss', 'content': 0.16974611580371857, 'timestamp': '2025-09-10 03:02:33.428707', 'step': 20780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:33.480465', 'step': 20780, 'epoch': 3} {'type': 'loss', 'content': 0.09710731357336044, 'timestamp': '2025-09-10 03:02:33.482400', 'step': 20781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:33.536793', 'step': 20781, 'epoch': 3} {'type': 'loss', 'content': 0.08601288497447968, 'timestamp': '2025-09-10 03:02:33.538705', 'step': 20782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:33.591361', 'step': 20782, 'epoch': 3} {'type': 'loss', 'content': 0.05754069611430168, 'timestamp': '2025-09-10 03:02:33.593292', 'step': 20783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:33.646604', 'step': 20783, 'epoch': 3} {'type': 'loss', 'content': 0.04374191164970398, 'timestamp': '2025-09-10 03:02:33.652249', 'step': 20784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:33.705899', 'step': 20784, 'epoch': 3} {'type': 'loss', 'content': 0.1163947731256485, 'timestamp': '2025-09-10 03:02:33.707858', 'step': 20785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:33.761584', 'step': 20785, 'epoch': 3} {'type': 'loss', 'content': 0.08344508707523346, 'timestamp': '2025-09-10 03:02:33.763847', 'step': 20786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:33.817090', 'step': 20786, 'epoch': 3} {'type': 'loss', 'content': 0.07606615871191025, 'timestamp': '2025-09-10 03:02:33.819583', 'step': 20787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:33.872183', 'step': 20787, 'epoch': 3} {'type': 'loss', 'content': 0.11624504625797272, 'timestamp': '2025-09-10 03:02:33.878279', 'step': 20788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:33.931243', 'step': 20788, 'epoch': 3} {'type': 'loss', 'content': 0.09868407994508743, 'timestamp': '2025-09-10 03:02:33.933749', 'step': 20789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:33.987234', 'step': 20789, 'epoch': 3} {'type': 'loss', 'content': 0.16187235713005066, 'timestamp': '2025-09-10 03:02:33.990844', 'step': 20790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:34.046909', 'step': 20790, 'epoch': 3} {'type': 'loss', 'content': 0.06685841083526611, 'timestamp': '2025-09-10 03:02:34.049402', 'step': 20791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:34.103634', 'step': 20791, 'epoch': 3} {'type': 'loss', 'content': 0.07616929709911346, 'timestamp': '2025-09-10 03:02:34.109776', 'step': 20792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:34.166791', 'step': 20792, 'epoch': 3} {'type': 'loss', 'content': 0.10936609655618668, 'timestamp': '2025-09-10 03:02:34.169988', 'step': 20793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:34.225325', 'step': 20793, 'epoch': 3} {'type': 'loss', 'content': 0.025356901809573174, 'timestamp': '2025-09-10 03:02:34.227706', 'step': 20794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:34.284603', 'step': 20794, 'epoch': 3} {'type': 'loss', 'content': 0.08835994452238083, 'timestamp': '2025-09-10 03:02:34.286900', 'step': 20795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:34.341859', 'step': 20795, 'epoch': 3} {'type': 'loss', 'content': 0.0793117880821228, 'timestamp': '2025-09-10 03:02:34.348027', 'step': 20796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:34.412937', 'step': 20796, 'epoch': 3} {'type': 'loss', 'content': 0.07049247622489929, 'timestamp': '2025-09-10 03:02:34.415512', 'step': 20797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:34.475949', 'step': 20797, 'epoch': 3} {'type': 'loss', 'content': 0.06605113297700882, 'timestamp': '2025-09-10 03:02:34.478391', 'step': 20798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:34.534772', 'step': 20798, 'epoch': 3} {'type': 'loss', 'content': 0.1404978185892105, 'timestamp': '2025-09-10 03:02:34.537086', 'step': 20799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:34.596362', 'step': 20799, 'epoch': 3} {'type': 'loss', 'content': 0.09596995264291763, 'timestamp': '2025-09-10 03:02:34.602550', 'step': 20800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:34.660714', 'step': 20800, 'epoch': 3} {'type': 'loss', 'content': 0.17188186943531036, 'timestamp': '2025-09-10 03:02:34.663158', 'step': 20801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:34.718893', 'step': 20801, 'epoch': 3} {'type': 'loss', 'content': 0.10101860016584396, 'timestamp': '2025-09-10 03:02:34.721171', 'step': 20802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:34.776672', 'step': 20802, 'epoch': 3} {'type': 'loss', 'content': 0.08157750964164734, 'timestamp': '2025-09-10 03:02:34.779005', 'step': 20803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:34.835917', 'step': 20803, 'epoch': 3} {'type': 'loss', 'content': 0.04712146148085594, 'timestamp': '2025-09-10 03:02:34.842212', 'step': 20804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:34.894811', 'step': 20804, 'epoch': 3} {'type': 'loss', 'content': 0.03660234436392784, 'timestamp': '2025-09-10 03:02:34.897152', 'step': 20805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:34.955458', 'step': 20805, 'epoch': 3} {'type': 'loss', 'content': 0.163942351937294, 'timestamp': '2025-09-10 03:02:34.957778', 'step': 20806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:35.011209', 'step': 20806, 'epoch': 3} {'type': 'loss', 'content': 0.05688117444515228, 'timestamp': '2025-09-10 03:02:35.013308', 'step': 20807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:35.066912', 'step': 20807, 'epoch': 3} {'type': 'loss', 'content': 0.09236801415681839, 'timestamp': '2025-09-10 03:02:35.073086', 'step': 20808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:35.125950', 'step': 20808, 'epoch': 3} {'type': 'loss', 'content': 0.21657726168632507, 'timestamp': '2025-09-10 03:02:35.128360', 'step': 20809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:35.182921', 'step': 20809, 'epoch': 3} {'type': 'loss', 'content': 0.07632569968700409, 'timestamp': '2025-09-10 03:02:35.185229', 'step': 20810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:35.239131', 'step': 20810, 'epoch': 3} {'type': 'loss', 'content': 0.10981088131666183, 'timestamp': '2025-09-10 03:02:35.241316', 'step': 20811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:35.295196', 'step': 20811, 'epoch': 3} {'type': 'loss', 'content': 0.07114674150943756, 'timestamp': '2025-09-10 03:02:35.301485', 'step': 20812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:35.354728', 'step': 20812, 'epoch': 3} {'type': 'loss', 'content': 0.05395910516381264, 'timestamp': '2025-09-10 03:02:35.357012', 'step': 20813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:35.409810', 'step': 20813, 'epoch': 3} {'type': 'loss', 'content': 0.04707862809300423, 'timestamp': '2025-09-10 03:02:35.412129', 'step': 20814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:35.465231', 'step': 20814, 'epoch': 3} {'type': 'loss', 'content': 0.09551940113306046, 'timestamp': '2025-09-10 03:02:35.467738', 'step': 20815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:35.520953', 'step': 20815, 'epoch': 3} {'type': 'loss', 'content': 0.10849324613809586, 'timestamp': '2025-09-10 03:02:35.527167', 'step': 20816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:35.582101', 'step': 20816, 'epoch': 3} {'type': 'loss', 'content': 0.12255297601222992, 'timestamp': '2025-09-10 03:02:35.584454', 'step': 20817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:35.637681', 'step': 20817, 'epoch': 3} {'type': 'loss', 'content': 0.0942668691277504, 'timestamp': '2025-09-10 03:02:35.639970', 'step': 20818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:35.693015', 'step': 20818, 'epoch': 3} {'type': 'loss', 'content': 0.0344926081597805, 'timestamp': '2025-09-10 03:02:35.695415', 'step': 20819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:35.752151', 'step': 20819, 'epoch': 3} {'type': 'loss', 'content': 0.12125355750322342, 'timestamp': '2025-09-10 03:02:35.758116', 'step': 20820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:35.810897', 'step': 20820, 'epoch': 3} {'type': 'loss', 'content': 0.09728094935417175, 'timestamp': '2025-09-10 03:02:35.813239', 'step': 20821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:35.867953', 'step': 20821, 'epoch': 3} {'type': 'loss', 'content': 0.03798860311508179, 'timestamp': '2025-09-10 03:02:35.870361', 'step': 20822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:35.924000', 'step': 20822, 'epoch': 3} {'type': 'loss', 'content': 0.05110274255275726, 'timestamp': '2025-09-10 03:02:35.926320', 'step': 20823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:35.989006', 'step': 20823, 'epoch': 3} {'type': 'loss', 'content': 0.15789885818958282, 'timestamp': '2025-09-10 03:02:35.995261', 'step': 20824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:36.048680', 'step': 20824, 'epoch': 3} {'type': 'loss', 'content': 0.08741872012615204, 'timestamp': '2025-09-10 03:02:36.050992', 'step': 20825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:36.104103', 'step': 20825, 'epoch': 3} {'type': 'loss', 'content': 0.11768557876348495, 'timestamp': '2025-09-10 03:02:36.108728', 'step': 20826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:36.170670', 'step': 20826, 'epoch': 3} {'type': 'loss', 'content': 0.08233074098825455, 'timestamp': '2025-09-10 03:02:36.172981', 'step': 20827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:36.226772', 'step': 20827, 'epoch': 3} {'type': 'loss', 'content': 0.12828245759010315, 'timestamp': '2025-09-10 03:02:36.232759', 'step': 20828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:36.286219', 'step': 20828, 'epoch': 3} {'type': 'loss', 'content': 0.14734411239624023, 'timestamp': '2025-09-10 03:02:36.288629', 'step': 20829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:36.342955', 'step': 20829, 'epoch': 3} {'type': 'loss', 'content': 0.10992413014173508, 'timestamp': '2025-09-10 03:02:36.345528', 'step': 20830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:36.401514', 'step': 20830, 'epoch': 3} {'type': 'loss', 'content': 0.08321738243103027, 'timestamp': '2025-09-10 03:02:36.403843', 'step': 20831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:36.458871', 'step': 20831, 'epoch': 3} {'type': 'loss', 'content': 0.10170300304889679, 'timestamp': '2025-09-10 03:02:36.466424', 'step': 20832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:36.521812', 'step': 20832, 'epoch': 3} {'type': 'loss', 'content': 0.07947053760290146, 'timestamp': '2025-09-10 03:02:36.526073', 'step': 20833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:36.589243', 'step': 20833, 'epoch': 3} {'type': 'loss', 'content': 0.19827359914779663, 'timestamp': '2025-09-10 03:02:36.591515', 'step': 20834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:36.645344', 'step': 20834, 'epoch': 3} {'type': 'loss', 'content': 0.12663276493549347, 'timestamp': '2025-09-10 03:02:36.647685', 'step': 20835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:36.700946', 'step': 20835, 'epoch': 3} {'type': 'loss', 'content': 0.08573275059461594, 'timestamp': '2025-09-10 03:02:36.707615', 'step': 20836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:36.761920', 'step': 20836, 'epoch': 3} {'type': 'loss', 'content': 0.07478167116641998, 'timestamp': '2025-09-10 03:02:36.764290', 'step': 20837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:36.817682', 'step': 20837, 'epoch': 3} {'type': 'loss', 'content': 0.0214790478348732, 'timestamp': '2025-09-10 03:02:36.820033', 'step': 20838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:36.873884', 'step': 20838, 'epoch': 3} {'type': 'loss', 'content': 0.11728397011756897, 'timestamp': '2025-09-10 03:02:36.876094', 'step': 20839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:36.930133', 'step': 20839, 'epoch': 3} {'type': 'loss', 'content': 0.11770833283662796, 'timestamp': '2025-09-10 03:02:36.936338', 'step': 20840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:36.989014', 'step': 20840, 'epoch': 3} {'type': 'loss', 'content': 0.10772835463285446, 'timestamp': '2025-09-10 03:02:36.991958', 'step': 20841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:37.047736', 'step': 20841, 'epoch': 3} {'type': 'loss', 'content': 0.10092593729496002, 'timestamp': '2025-09-10 03:02:37.051823', 'step': 20842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:37.112283', 'step': 20842, 'epoch': 3} {'type': 'loss', 'content': 0.1202663853764534, 'timestamp': '2025-09-10 03:02:37.114744', 'step': 20843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:37.168885', 'step': 20843, 'epoch': 3} {'type': 'loss', 'content': 0.08406155556440353, 'timestamp': '2025-09-10 03:02:37.175144', 'step': 20844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:37.228711', 'step': 20844, 'epoch': 3} {'type': 'loss', 'content': 0.042419858276844025, 'timestamp': '2025-09-10 03:02:37.231023', 'step': 20845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:37.285095', 'step': 20845, 'epoch': 3} {'type': 'loss', 'content': 0.07166910916566849, 'timestamp': '2025-09-10 03:02:37.287449', 'step': 20846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:37.341128', 'step': 20846, 'epoch': 3} {'type': 'loss', 'content': 0.04356737434864044, 'timestamp': '2025-09-10 03:02:37.343518', 'step': 20847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:37.399910', 'step': 20847, 'epoch': 3} {'type': 'loss', 'content': 0.04145468771457672, 'timestamp': '2025-09-10 03:02:37.405936', 'step': 20848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:37.466766', 'step': 20848, 'epoch': 3} {'type': 'loss', 'content': 0.07531798630952835, 'timestamp': '2025-09-10 03:02:37.470094', 'step': 20849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:37.526427', 'step': 20849, 'epoch': 3} {'type': 'loss', 'content': 0.042274098843336105, 'timestamp': '2025-09-10 03:02:37.528765', 'step': 20850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:37.583673', 'step': 20850, 'epoch': 3} {'type': 'loss', 'content': 0.07227632403373718, 'timestamp': '2025-09-10 03:02:37.586257', 'step': 20851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:37.639680', 'step': 20851, 'epoch': 3} {'type': 'loss', 'content': 0.11856314539909363, 'timestamp': '2025-09-10 03:02:37.645865', 'step': 20852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:37.698838', 'step': 20852, 'epoch': 3} {'type': 'loss', 'content': 0.17508935928344727, 'timestamp': '2025-09-10 03:02:37.701214', 'step': 20853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:37.754707', 'step': 20853, 'epoch': 3} {'type': 'loss', 'content': 0.08310630917549133, 'timestamp': '2025-09-10 03:02:37.757190', 'step': 20854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:37.810590', 'step': 20854, 'epoch': 3} {'type': 'loss', 'content': 0.09458594024181366, 'timestamp': '2025-09-10 03:02:37.812860', 'step': 20855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:37.866272', 'step': 20855, 'epoch': 3} {'type': 'loss', 'content': 0.0688491016626358, 'timestamp': '2025-09-10 03:02:37.872258', 'step': 20856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:37.927369', 'step': 20856, 'epoch': 3} {'type': 'loss', 'content': 0.05519186332821846, 'timestamp': '2025-09-10 03:02:37.929532', 'step': 20857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:37.984338', 'step': 20857, 'epoch': 3} {'type': 'loss', 'content': 0.08475081622600555, 'timestamp': '2025-09-10 03:02:37.986844', 'step': 20858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:38.041721', 'step': 20858, 'epoch': 3} {'type': 'loss', 'content': 0.11104870587587357, 'timestamp': '2025-09-10 03:02:38.044295', 'step': 20859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:38.099153', 'step': 20859, 'epoch': 3} {'type': 'loss', 'content': 0.14538446068763733, 'timestamp': '2025-09-10 03:02:38.105576', 'step': 20860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:38.159324', 'step': 20860, 'epoch': 3} {'type': 'loss', 'content': 0.11327648162841797, 'timestamp': '2025-09-10 03:02:38.161812', 'step': 20861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:38.216511', 'step': 20861, 'epoch': 3} {'type': 'loss', 'content': 0.0851740688085556, 'timestamp': '2025-09-10 03:02:38.218874', 'step': 20862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:38.273965', 'step': 20862, 'epoch': 3} {'type': 'loss', 'content': 0.04494326934218407, 'timestamp': '2025-09-10 03:02:38.276699', 'step': 20863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:38.331551', 'step': 20863, 'epoch': 3} {'type': 'loss', 'content': 0.0575275681912899, 'timestamp': '2025-09-10 03:02:38.337994', 'step': 20864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:38.391684', 'step': 20864, 'epoch': 3} {'type': 'loss', 'content': 0.18820810317993164, 'timestamp': '2025-09-10 03:02:38.394049', 'step': 20865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:38.447938', 'step': 20865, 'epoch': 3} {'type': 'loss', 'content': 0.07748941332101822, 'timestamp': '2025-09-10 03:02:38.450128', 'step': 20866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:38.503733', 'step': 20866, 'epoch': 3} {'type': 'loss', 'content': 0.0888245552778244, 'timestamp': '2025-09-10 03:02:38.505998', 'step': 20867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:38.559637', 'step': 20867, 'epoch': 3} {'type': 'loss', 'content': 0.12850908935070038, 'timestamp': '2025-09-10 03:02:38.565741', 'step': 20868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:38.618510', 'step': 20868, 'epoch': 3} {'type': 'loss', 'content': 0.11467878520488739, 'timestamp': '2025-09-10 03:02:38.620926', 'step': 20869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:38.674100', 'step': 20869, 'epoch': 3} {'type': 'loss', 'content': 0.11979794502258301, 'timestamp': '2025-09-10 03:02:38.676491', 'step': 20870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:38.731145', 'step': 20870, 'epoch': 3} {'type': 'loss', 'content': 0.0937032699584961, 'timestamp': '2025-09-10 03:02:38.733536', 'step': 20871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:38.790907', 'step': 20871, 'epoch': 3} {'type': 'loss', 'content': 0.1154918372631073, 'timestamp': '2025-09-10 03:02:38.797431', 'step': 20872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:38.851091', 'step': 20872, 'epoch': 3} {'type': 'loss', 'content': 0.09391255676746368, 'timestamp': '2025-09-10 03:02:38.853617', 'step': 20873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:38.908793', 'step': 20873, 'epoch': 3} {'type': 'loss', 'content': 0.0803544819355011, 'timestamp': '2025-09-10 03:02:38.911072', 'step': 20874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:38.965301', 'step': 20874, 'epoch': 3} {'type': 'loss', 'content': 0.1164647787809372, 'timestamp': '2025-09-10 03:02:38.967557', 'step': 20875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:39.020726', 'step': 20875, 'epoch': 3} {'type': 'loss', 'content': 0.04859638586640358, 'timestamp': '2025-09-10 03:02:39.026764', 'step': 20876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:39.080192', 'step': 20876, 'epoch': 3} {'type': 'loss', 'content': 0.08960864692926407, 'timestamp': '2025-09-10 03:02:39.082630', 'step': 20877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:39.135928', 'step': 20877, 'epoch': 3} {'type': 'loss', 'content': 0.08120311051607132, 'timestamp': '2025-09-10 03:02:39.138415', 'step': 20878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:39.191456', 'step': 20878, 'epoch': 3} {'type': 'loss', 'content': 0.10198698192834854, 'timestamp': '2025-09-10 03:02:39.193597', 'step': 20879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:39.246479', 'step': 20879, 'epoch': 3} {'type': 'loss', 'content': 0.03535941243171692, 'timestamp': '2025-09-10 03:02:39.252663', 'step': 20880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:39.306502', 'step': 20880, 'epoch': 3} {'type': 'loss', 'content': 0.11339415609836578, 'timestamp': '2025-09-10 03:02:39.308798', 'step': 20881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:39.362734', 'step': 20881, 'epoch': 3} {'type': 'loss', 'content': 0.07611323893070221, 'timestamp': '2025-09-10 03:02:39.365060', 'step': 20882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:39.418187', 'step': 20882, 'epoch': 3} {'type': 'loss', 'content': 0.017287667840719223, 'timestamp': '2025-09-10 03:02:39.420393', 'step': 20883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:39.473122', 'step': 20883, 'epoch': 3} {'type': 'loss', 'content': 0.016159534454345703, 'timestamp': '2025-09-10 03:02:39.479187', 'step': 20884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:39.532074', 'step': 20884, 'epoch': 3} {'type': 'loss', 'content': 0.08741095662117004, 'timestamp': '2025-09-10 03:02:39.534432', 'step': 20885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:39.591753', 'step': 20885, 'epoch': 3} {'type': 'loss', 'content': 0.08540259301662445, 'timestamp': '2025-09-10 03:02:39.594323', 'step': 20886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:39.648526', 'step': 20886, 'epoch': 3} {'type': 'loss', 'content': 0.05242796987295151, 'timestamp': '2025-09-10 03:02:39.651226', 'step': 20887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:39.706084', 'step': 20887, 'epoch': 3} {'type': 'loss', 'content': 0.11319763213396072, 'timestamp': '2025-09-10 03:02:39.712183', 'step': 20888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:39.766505', 'step': 20888, 'epoch': 3} {'type': 'loss', 'content': 0.0798381045460701, 'timestamp': '2025-09-10 03:02:39.768797', 'step': 20889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:39.823170', 'step': 20889, 'epoch': 3} {'type': 'loss', 'content': 0.0896788239479065, 'timestamp': '2025-09-10 03:02:39.825523', 'step': 20890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:39.878994', 'step': 20890, 'epoch': 3} {'type': 'loss', 'content': 0.04887903109192848, 'timestamp': '2025-09-10 03:02:39.881417', 'step': 20891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 03:02:39.935175', 'step': 20891, 'epoch': 3} {'type': 'loss', 'content': 0.06293721497058868, 'timestamp': '2025-09-10 03:02:39.941356', 'step': 20892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:39.994294', 'step': 20892, 'epoch': 3} {'type': 'loss', 'content': 0.10936218500137329, 'timestamp': '2025-09-10 03:02:39.996592', 'step': 20893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:40.050322', 'step': 20893, 'epoch': 3} {'type': 'loss', 'content': 0.07253127545118332, 'timestamp': '2025-09-10 03:02:40.052645', 'step': 20894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:40.106248', 'step': 20894, 'epoch': 3} {'type': 'loss', 'content': 0.09687306731939316, 'timestamp': '2025-09-10 03:02:40.108609', 'step': 20895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:40.161839', 'step': 20895, 'epoch': 3} {'type': 'loss', 'content': 0.09307496249675751, 'timestamp': '2025-09-10 03:02:40.168014', 'step': 20896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:40.222164', 'step': 20896, 'epoch': 3} {'type': 'loss', 'content': 0.08583063632249832, 'timestamp': '2025-09-10 03:02:40.224475', 'step': 20897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:40.277551', 'step': 20897, 'epoch': 3} {'type': 'loss', 'content': 0.11128099262714386, 'timestamp': '2025-09-10 03:02:40.279854', 'step': 20898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:02:40.333553', 'step': 20898, 'epoch': 3} {'type': 'loss', 'content': 0.12549464404582977, 'timestamp': '2025-09-10 03:02:40.335963', 'step': 20899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:02:40.392588', 'step': 20899, 'epoch': 3} {'type': 'loss', 'content': 0.09972929954528809, 'timestamp': '2025-09-10 03:02:40.398741', 'step': 20900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:40.452063', 'step': 20900, 'epoch': 3} {'type': 'loss', 'content': 0.08213061839342117, 'timestamp': '2025-09-10 03:02:40.454488', 'step': 20901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:40.508453', 'step': 20901, 'epoch': 3} {'type': 'loss', 'content': 0.09835588186979294, 'timestamp': '2025-09-10 03:02:40.510821', 'step': 20902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:40.564258', 'step': 20902, 'epoch': 3} {'type': 'loss', 'content': 0.11459524929523468, 'timestamp': '2025-09-10 03:02:40.566464', 'step': 20903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:40.619404', 'step': 20903, 'epoch': 3} {'type': 'loss', 'content': 0.12589368224143982, 'timestamp': '2025-09-10 03:02:40.625372', 'step': 20904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:40.678248', 'step': 20904, 'epoch': 3} {'type': 'loss', 'content': 0.07555575668811798, 'timestamp': '2025-09-10 03:02:40.680558', 'step': 20905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:40.734431', 'step': 20905, 'epoch': 3} {'type': 'loss', 'content': 0.06951931864023209, 'timestamp': '2025-09-10 03:02:40.736770', 'step': 20906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:40.791113', 'step': 20906, 'epoch': 3} {'type': 'loss', 'content': 0.05851299315690994, 'timestamp': '2025-09-10 03:02:40.793454', 'step': 20907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:40.846863', 'step': 20907, 'epoch': 3} {'type': 'loss', 'content': 0.084006167948246, 'timestamp': '2025-09-10 03:02:40.853028', 'step': 20908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:40.906858', 'step': 20908, 'epoch': 3} {'type': 'loss', 'content': 0.058924052864313126, 'timestamp': '2025-09-10 03:02:40.909199', 'step': 20909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:40.963052', 'step': 20909, 'epoch': 3} {'type': 'loss', 'content': 0.13167616724967957, 'timestamp': '2025-09-10 03:02:40.965452', 'step': 20910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:41.018744', 'step': 20910, 'epoch': 3} {'type': 'loss', 'content': 0.07686861604452133, 'timestamp': '2025-09-10 03:02:41.021296', 'step': 20911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:41.075711', 'step': 20911, 'epoch': 3} {'type': 'loss', 'content': 0.03619314730167389, 'timestamp': '2025-09-10 03:02:41.081671', 'step': 20912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:41.135889', 'step': 20912, 'epoch': 3} {'type': 'loss', 'content': 0.07079365104436874, 'timestamp': '2025-09-10 03:02:41.138187', 'step': 20913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:41.191906', 'step': 20913, 'epoch': 3} {'type': 'loss', 'content': 0.1101384237408638, 'timestamp': '2025-09-10 03:02:41.194341', 'step': 20914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:41.247616', 'step': 20914, 'epoch': 3} {'type': 'loss', 'content': 0.14112132787704468, 'timestamp': '2025-09-10 03:02:41.250078', 'step': 20915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:41.303646', 'step': 20915, 'epoch': 3} {'type': 'loss', 'content': 0.07727553695440292, 'timestamp': '2025-09-10 03:02:41.309825', 'step': 20916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:41.363309', 'step': 20916, 'epoch': 3} {'type': 'loss', 'content': 0.10093008726835251, 'timestamp': '2025-09-10 03:02:41.365514', 'step': 20917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:41.421891', 'step': 20917, 'epoch': 3} {'type': 'loss', 'content': 0.09997492283582687, 'timestamp': '2025-09-10 03:02:41.424199', 'step': 20918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:41.477250', 'step': 20918, 'epoch': 3} {'type': 'loss', 'content': 0.13853725790977478, 'timestamp': '2025-09-10 03:02:41.479569', 'step': 20919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:41.532121', 'step': 20919, 'epoch': 3} {'type': 'loss', 'content': 0.1395251452922821, 'timestamp': '2025-09-10 03:02:41.538343', 'step': 20920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:41.591458', 'step': 20920, 'epoch': 3} {'type': 'loss', 'content': 0.08709514886140823, 'timestamp': '2025-09-10 03:02:41.593705', 'step': 20921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:41.647325', 'step': 20921, 'epoch': 3} {'type': 'loss', 'content': 0.08981908112764359, 'timestamp': '2025-09-10 03:02:41.649683', 'step': 20922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:41.703406', 'step': 20922, 'epoch': 3} {'type': 'loss', 'content': 0.09341476112604141, 'timestamp': '2025-09-10 03:02:41.705835', 'step': 20923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:41.761002', 'step': 20923, 'epoch': 3} {'type': 'loss', 'content': 0.1036459431052208, 'timestamp': '2025-09-10 03:02:41.767251', 'step': 20924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:41.820059', 'step': 20924, 'epoch': 3} {'type': 'loss', 'content': 0.10693322867155075, 'timestamp': '2025-09-10 03:02:41.822136', 'step': 20925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:41.878013', 'step': 20925, 'epoch': 3} {'type': 'loss', 'content': 0.052952468395233154, 'timestamp': '2025-09-10 03:02:41.880309', 'step': 20926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:41.935814', 'step': 20926, 'epoch': 3} {'type': 'loss', 'content': 0.17134897410869598, 'timestamp': '2025-09-10 03:02:41.937940', 'step': 20927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:41.994988', 'step': 20927, 'epoch': 3} {'type': 'loss', 'content': 0.04775765538215637, 'timestamp': '2025-09-10 03:02:42.001406', 'step': 20928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:42.054586', 'step': 20928, 'epoch': 3} {'type': 'loss', 'content': 0.07923506945371628, 'timestamp': '2025-09-10 03:02:42.056822', 'step': 20929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:42.110584', 'step': 20929, 'epoch': 3} {'type': 'loss', 'content': 0.11670418083667755, 'timestamp': '2025-09-10 03:02:42.113026', 'step': 20930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:42.167022', 'step': 20930, 'epoch': 3} {'type': 'loss', 'content': 0.15346477925777435, 'timestamp': '2025-09-10 03:02:42.169225', 'step': 20931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:42.222528', 'step': 20931, 'epoch': 3} {'type': 'loss', 'content': 0.025573141872882843, 'timestamp': '2025-09-10 03:02:42.228721', 'step': 20932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:42.281659', 'step': 20932, 'epoch': 3} {'type': 'loss', 'content': 0.13425029814243317, 'timestamp': '2025-09-10 03:02:42.284023', 'step': 20933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:42.337524', 'step': 20933, 'epoch': 3} {'type': 'loss', 'content': 0.0956173911690712, 'timestamp': '2025-09-10 03:02:42.339823', 'step': 20934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:02:42.394105', 'step': 20934, 'epoch': 3} {'type': 'loss', 'content': 0.06220896914601326, 'timestamp': '2025-09-10 03:02:42.396424', 'step': 20935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:42.451161', 'step': 20935, 'epoch': 3} {'type': 'loss', 'content': 0.11238763481378555, 'timestamp': '2025-09-10 03:02:42.457575', 'step': 20936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:42.511702', 'step': 20936, 'epoch': 3} {'type': 'loss', 'content': 0.1005447506904602, 'timestamp': '2025-09-10 03:02:42.514047', 'step': 20937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:42.568090', 'step': 20937, 'epoch': 3} {'type': 'loss', 'content': 0.05795980989933014, 'timestamp': '2025-09-10 03:02:42.570721', 'step': 20938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:42.624474', 'step': 20938, 'epoch': 3} {'type': 'loss', 'content': 0.023586075752973557, 'timestamp': '2025-09-10 03:02:42.626929', 'step': 20939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:42.681014', 'step': 20939, 'epoch': 3} {'type': 'loss', 'content': 0.14118117094039917, 'timestamp': '2025-09-10 03:02:42.687252', 'step': 20940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:42.743631', 'step': 20940, 'epoch': 3} {'type': 'loss', 'content': 0.06950466334819794, 'timestamp': '2025-09-10 03:02:42.745867', 'step': 20941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:42.799023', 'step': 20941, 'epoch': 3} {'type': 'loss', 'content': 0.06708177924156189, 'timestamp': '2025-09-10 03:02:42.801322', 'step': 20942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:42.855952', 'step': 20942, 'epoch': 3} {'type': 'loss', 'content': 0.07211148738861084, 'timestamp': '2025-09-10 03:02:42.858312', 'step': 20943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:42.911785', 'step': 20943, 'epoch': 3} {'type': 'loss', 'content': 0.13153330981731415, 'timestamp': '2025-09-10 03:02:42.917903', 'step': 20944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:42.971064', 'step': 20944, 'epoch': 3} {'type': 'loss', 'content': 0.0642678290605545, 'timestamp': '2025-09-10 03:02:42.973584', 'step': 20945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:43.028064', 'step': 20945, 'epoch': 3} {'type': 'loss', 'content': 0.08285576850175858, 'timestamp': '2025-09-10 03:02:43.030403', 'step': 20946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.084049', 'step': 20946, 'epoch': 3} {'type': 'loss', 'content': 0.033119454979896545, 'timestamp': '2025-09-10 03:02:43.086410', 'step': 20947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.140068', 'step': 20947, 'epoch': 3} {'type': 'loss', 'content': 0.07925740629434586, 'timestamp': '2025-09-10 03:02:43.146233', 'step': 20948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.198708', 'step': 20948, 'epoch': 3} {'type': 'loss', 'content': 0.13577595353126526, 'timestamp': '2025-09-10 03:02:43.201194', 'step': 20949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:43.254788', 'step': 20949, 'epoch': 3} {'type': 'loss', 'content': 0.08457263559103012, 'timestamp': '2025-09-10 03:02:43.257291', 'step': 20950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.310793', 'step': 20950, 'epoch': 3} {'type': 'loss', 'content': 0.06605029106140137, 'timestamp': '2025-09-10 03:02:43.313124', 'step': 20951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:43.366853', 'step': 20951, 'epoch': 3} {'type': 'loss', 'content': 0.07621273398399353, 'timestamp': '2025-09-10 03:02:43.372885', 'step': 20952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.426431', 'step': 20952, 'epoch': 3} {'type': 'loss', 'content': 0.07389609515666962, 'timestamp': '2025-09-10 03:02:43.428746', 'step': 20953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.482479', 'step': 20953, 'epoch': 3} {'type': 'loss', 'content': 0.0793505534529686, 'timestamp': '2025-09-10 03:02:43.484806', 'step': 20954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:43.538959', 'step': 20954, 'epoch': 3} {'type': 'loss', 'content': 0.14269979298114777, 'timestamp': '2025-09-10 03:02:43.541180', 'step': 20955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.595630', 'step': 20955, 'epoch': 3} {'type': 'loss', 'content': 0.1876797378063202, 'timestamp': '2025-09-10 03:02:43.601989', 'step': 20956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:43.655415', 'step': 20956, 'epoch': 3} {'type': 'loss', 'content': 0.05481327697634697, 'timestamp': '2025-09-10 03:02:43.657669', 'step': 20957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:43.710643', 'step': 20957, 'epoch': 3} {'type': 'loss', 'content': 0.08466723561286926, 'timestamp': '2025-09-10 03:02:43.713088', 'step': 20958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.768403', 'step': 20958, 'epoch': 3} {'type': 'loss', 'content': 0.10377928614616394, 'timestamp': '2025-09-10 03:02:43.771020', 'step': 20959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.825081', 'step': 20959, 'epoch': 3} {'type': 'loss', 'content': 0.0893494263291359, 'timestamp': '2025-09-10 03:02:43.830993', 'step': 20960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:43.883352', 'step': 20960, 'epoch': 3} {'type': 'loss', 'content': 0.13724106550216675, 'timestamp': '2025-09-10 03:02:43.885685', 'step': 20961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:43.939193', 'step': 20961, 'epoch': 3} {'type': 'loss', 'content': 0.04759141430258751, 'timestamp': '2025-09-10 03:02:43.941486', 'step': 20962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:43.996120', 'step': 20962, 'epoch': 3} {'type': 'loss', 'content': 0.07140389084815979, 'timestamp': '2025-09-10 03:02:43.998467', 'step': 20963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:44.052246', 'step': 20963, 'epoch': 3} {'type': 'loss', 'content': 0.05762370303273201, 'timestamp': '2025-09-10 03:02:44.058591', 'step': 20964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:44.111638', 'step': 20964, 'epoch': 3} {'type': 'loss', 'content': 0.09819266200065613, 'timestamp': '2025-09-10 03:02:44.113716', 'step': 20965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:44.166787', 'step': 20965, 'epoch': 3} {'type': 'loss', 'content': 0.08468236029148102, 'timestamp': '2025-09-10 03:02:44.169109', 'step': 20966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:44.222141', 'step': 20966, 'epoch': 3} {'type': 'loss', 'content': 0.04901006072759628, 'timestamp': '2025-09-10 03:02:44.224463', 'step': 20967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:44.279378', 'step': 20967, 'epoch': 3} {'type': 'loss', 'content': 0.059470728039741516, 'timestamp': '2025-09-10 03:02:44.285550', 'step': 20968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:44.338407', 'step': 20968, 'epoch': 3} {'type': 'loss', 'content': 0.10745296627283096, 'timestamp': '2025-09-10 03:02:44.340589', 'step': 20969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:44.394916', 'step': 20969, 'epoch': 3} {'type': 'loss', 'content': 0.09671764820814133, 'timestamp': '2025-09-10 03:02:44.397123', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 03:02:57.186395', 'step': 20970, 'epoch': 3} {'type': 'pplx', 'content': 10924.58606998657, 'timestamp': '2025-09-10 03:02:57.189395', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:57.246129', 'step': 20970, 'epoch': 3} {'type': 'loss', 'content': 0.1405957043170929, 'timestamp': '2025-09-10 03:02:57.248318', 'step': 20971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:57.303856', 'step': 20971, 'epoch': 3} {'type': 'loss', 'content': 0.05188258737325668, 'timestamp': '2025-09-10 03:02:57.310062', 'step': 20972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:57.367447', 'step': 20972, 'epoch': 3} {'type': 'loss', 'content': 0.056196969002485275, 'timestamp': '2025-09-10 03:02:57.369486', 'step': 20973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:57.422726', 'step': 20973, 'epoch': 3} {'type': 'loss', 'content': 0.04695478454232216, 'timestamp': '2025-09-10 03:02:57.424774', 'step': 20974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:57.479515', 'step': 20974, 'epoch': 3} {'type': 'loss', 'content': 0.10234879702329636, 'timestamp': '2025-09-10 03:02:57.481535', 'step': 20975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:57.535212', 'step': 20975, 'epoch': 3} {'type': 'loss', 'content': 0.075356625020504, 'timestamp': '2025-09-10 03:02:57.541526', 'step': 20976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:57.594870', 'step': 20976, 'epoch': 3} {'type': 'loss', 'content': 0.06739760935306549, 'timestamp': '2025-09-10 03:02:57.596804', 'step': 20977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:57.650420', 'step': 20977, 'epoch': 3} {'type': 'loss', 'content': 0.07338471710681915, 'timestamp': '2025-09-10 03:02:57.652742', 'step': 20978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:57.705981', 'step': 20978, 'epoch': 3} {'type': 'loss', 'content': 0.20948870480060577, 'timestamp': '2025-09-10 03:02:57.708199', 'step': 20979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:57.761200', 'step': 20979, 'epoch': 3} {'type': 'loss', 'content': 0.09858469665050507, 'timestamp': '2025-09-10 03:02:57.766974', 'step': 20980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:57.823818', 'step': 20980, 'epoch': 3} {'type': 'loss', 'content': 0.09518377482891083, 'timestamp': '2025-09-10 03:02:57.825930', 'step': 20981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:57.880071', 'step': 20981, 'epoch': 3} {'type': 'loss', 'content': 0.12868507206439972, 'timestamp': '2025-09-10 03:02:57.882225', 'step': 20982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:57.935526', 'step': 20982, 'epoch': 3} {'type': 'loss', 'content': 0.025987502187490463, 'timestamp': '2025-09-10 03:02:57.937657', 'step': 20983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:57.991426', 'step': 20983, 'epoch': 3} {'type': 'loss', 'content': 0.04180249944329262, 'timestamp': '2025-09-10 03:02:57.997528', 'step': 20984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:58.057647', 'step': 20984, 'epoch': 3} {'type': 'loss', 'content': 0.11025318503379822, 'timestamp': '2025-09-10 03:02:58.059830', 'step': 20985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:58.113909', 'step': 20985, 'epoch': 3} {'type': 'loss', 'content': 0.021965375170111656, 'timestamp': '2025-09-10 03:02:58.116048', 'step': 20986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:58.169287', 'step': 20986, 'epoch': 3} {'type': 'loss', 'content': 0.11727997660636902, 'timestamp': '2025-09-10 03:02:58.171528', 'step': 20987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:58.224787', 'step': 20987, 'epoch': 3} {'type': 'loss', 'content': 0.09294327348470688, 'timestamp': '2025-09-10 03:02:58.230564', 'step': 20988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:58.284033', 'step': 20988, 'epoch': 3} {'type': 'loss', 'content': 0.049122266471385956, 'timestamp': '2025-09-10 03:02:58.286170', 'step': 20989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:58.339190', 'step': 20989, 'epoch': 3} {'type': 'loss', 'content': 0.05899380147457123, 'timestamp': '2025-09-10 03:02:58.341101', 'step': 20990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:58.394779', 'step': 20990, 'epoch': 3} {'type': 'loss', 'content': 0.11128940433263779, 'timestamp': '2025-09-10 03:02:58.396725', 'step': 20991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:58.450583', 'step': 20991, 'epoch': 3} {'type': 'loss', 'content': 0.05350916460156441, 'timestamp': '2025-09-10 03:02:58.456563', 'step': 20992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:58.510084', 'step': 20992, 'epoch': 3} {'type': 'loss', 'content': 0.06137152016162872, 'timestamp': '2025-09-10 03:02:58.512342', 'step': 20993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:58.570518', 'step': 20993, 'epoch': 3} {'type': 'loss', 'content': 0.06034145876765251, 'timestamp': '2025-09-10 03:02:58.572490', 'step': 20994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:58.644373', 'step': 20994, 'epoch': 3} {'type': 'loss', 'content': 0.06735052913427353, 'timestamp': '2025-09-10 03:02:58.646697', 'step': 20995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:02:58.722300', 'step': 20995, 'epoch': 3} {'type': 'loss', 'content': 0.10109332948923111, 'timestamp': '2025-09-10 03:02:58.728395', 'step': 20996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:58.800852', 'step': 20996, 'epoch': 3} {'type': 'loss', 'content': 0.02317647635936737, 'timestamp': '2025-09-10 03:02:58.803161', 'step': 20997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:58.871442', 'step': 20997, 'epoch': 3} {'type': 'loss', 'content': 0.07030245661735535, 'timestamp': '2025-09-10 03:02:58.873699', 'step': 20998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:58.961366', 'step': 20998, 'epoch': 3} {'type': 'loss', 'content': 0.09304317831993103, 'timestamp': '2025-09-10 03:02:58.963578', 'step': 20999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:59.023352', 'step': 20999, 'epoch': 3} {'type': 'loss', 'content': 0.08631269633769989, 'timestamp': '2025-09-10 03:02:59.029412', 'step': 21000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21000', 'timestamp': '2025-09-10 03:02:59.437390', 'step': 21000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:02:59.493583', 'step': 21000, 'epoch': 3} {'type': 'loss', 'content': 0.1378532499074936, 'timestamp': '2025-09-10 03:02:59.495655', 'step': 21001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:59.553426', 'step': 21001, 'epoch': 3} {'type': 'loss', 'content': 0.04734275862574577, 'timestamp': '2025-09-10 03:02:59.555527', 'step': 21002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:59.614286', 'step': 21002, 'epoch': 3} {'type': 'loss', 'content': 0.10958435386419296, 'timestamp': '2025-09-10 03:02:59.616285', 'step': 21003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:59.672307', 'step': 21003, 'epoch': 3} {'type': 'loss', 'content': 0.053141746670007706, 'timestamp': '2025-09-10 03:02:59.678465', 'step': 21004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:59.732803', 'step': 21004, 'epoch': 3} {'type': 'loss', 'content': 0.09651198238134384, 'timestamp': '2025-09-10 03:02:59.734945', 'step': 21005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:59.791187', 'step': 21005, 'epoch': 3} {'type': 'loss', 'content': 0.13604629039764404, 'timestamp': '2025-09-10 03:02:59.793717', 'step': 21006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:02:59.849227', 'step': 21006, 'epoch': 3} {'type': 'loss', 'content': 0.10487202554941177, 'timestamp': '2025-09-10 03:02:59.851642', 'step': 21007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:02:59.907376', 'step': 21007, 'epoch': 3} {'type': 'loss', 'content': 0.09861445426940918, 'timestamp': '2025-09-10 03:02:59.913789', 'step': 21008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:02:59.968102', 'step': 21008, 'epoch': 3} {'type': 'loss', 'content': 0.03501664102077484, 'timestamp': '2025-09-10 03:02:59.970418', 'step': 21009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:00.025167', 'step': 21009, 'epoch': 3} {'type': 'loss', 'content': 0.06275171786546707, 'timestamp': '2025-09-10 03:03:00.027625', 'step': 21010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:00.081894', 'step': 21010, 'epoch': 3} {'type': 'loss', 'content': 0.12489020824432373, 'timestamp': '2025-09-10 03:03:00.083957', 'step': 21011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:00.137187', 'step': 21011, 'epoch': 3} {'type': 'loss', 'content': 0.11609721928834915, 'timestamp': '2025-09-10 03:03:00.143300', 'step': 21012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:00.198376', 'step': 21012, 'epoch': 3} {'type': 'loss', 'content': 0.12182927876710892, 'timestamp': '2025-09-10 03:03:00.200589', 'step': 21013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:00.256220', 'step': 21013, 'epoch': 3} {'type': 'loss', 'content': 0.10691341012716293, 'timestamp': '2025-09-10 03:03:00.258305', 'step': 21014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:00.312760', 'step': 21014, 'epoch': 3} {'type': 'loss', 'content': 0.11223337054252625, 'timestamp': '2025-09-10 03:03:00.314772', 'step': 21015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:00.371664', 'step': 21015, 'epoch': 3} {'type': 'loss', 'content': 0.1420508176088333, 'timestamp': '2025-09-10 03:03:00.377896', 'step': 21016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:00.430685', 'step': 21016, 'epoch': 3} {'type': 'loss', 'content': 0.08832062035799026, 'timestamp': '2025-09-10 03:03:00.432626', 'step': 21017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:00.485304', 'step': 21017, 'epoch': 3} {'type': 'loss', 'content': 0.06516684591770172, 'timestamp': '2025-09-10 03:03:00.487351', 'step': 21018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:00.540323', 'step': 21018, 'epoch': 3} {'type': 'loss', 'content': 0.08141695708036423, 'timestamp': '2025-09-10 03:03:00.542316', 'step': 21019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:00.595116', 'step': 21019, 'epoch': 3} {'type': 'loss', 'content': 0.07420797646045685, 'timestamp': '2025-09-10 03:03:00.600758', 'step': 21020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:00.652525', 'step': 21020, 'epoch': 3} {'type': 'loss', 'content': 0.09632453322410583, 'timestamp': '2025-09-10 03:03:00.654465', 'step': 21021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:00.706887', 'step': 21021, 'epoch': 3} {'type': 'loss', 'content': 0.0345400832593441, 'timestamp': '2025-09-10 03:03:00.708999', 'step': 21022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:00.764260', 'step': 21022, 'epoch': 3} {'type': 'loss', 'content': 0.11461538076400757, 'timestamp': '2025-09-10 03:03:00.766474', 'step': 21023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:00.821785', 'step': 21023, 'epoch': 3} {'type': 'loss', 'content': 0.07709810882806778, 'timestamp': '2025-09-10 03:03:00.827607', 'step': 21024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:00.881386', 'step': 21024, 'epoch': 3} {'type': 'loss', 'content': 0.0822896733880043, 'timestamp': '2025-09-10 03:03:00.883822', 'step': 21025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:00.936252', 'step': 21025, 'epoch': 3} {'type': 'loss', 'content': 0.11761897057294846, 'timestamp': '2025-09-10 03:03:00.938428', 'step': 21026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:00.991682', 'step': 21026, 'epoch': 3} {'type': 'loss', 'content': 0.15323768556118011, 'timestamp': '2025-09-10 03:03:00.993683', 'step': 21027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:01.047674', 'step': 21027, 'epoch': 3} {'type': 'loss', 'content': 0.07880756258964539, 'timestamp': '2025-09-10 03:03:01.053424', 'step': 21028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:01.105711', 'step': 21028, 'epoch': 3} {'type': 'loss', 'content': 0.08356034755706787, 'timestamp': '2025-09-10 03:03:01.107713', 'step': 21029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:01.160684', 'step': 21029, 'epoch': 3} {'type': 'loss', 'content': 0.05663250759243965, 'timestamp': '2025-09-10 03:03:01.162767', 'step': 21030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:01.215729', 'step': 21030, 'epoch': 3} {'type': 'loss', 'content': 0.10246502608060837, 'timestamp': '2025-09-10 03:03:01.217885', 'step': 21031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:01.271094', 'step': 21031, 'epoch': 3} {'type': 'loss', 'content': 0.11946725100278854, 'timestamp': '2025-09-10 03:03:01.276875', 'step': 21032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:01.329435', 'step': 21032, 'epoch': 3} {'type': 'loss', 'content': 0.07946734875440598, 'timestamp': '2025-09-10 03:03:01.331377', 'step': 21033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:01.384936', 'step': 21033, 'epoch': 3} {'type': 'loss', 'content': 0.12564072012901306, 'timestamp': '2025-09-10 03:03:01.387105', 'step': 21034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:01.441569', 'step': 21034, 'epoch': 3} {'type': 'loss', 'content': 0.05234275385737419, 'timestamp': '2025-09-10 03:03:01.443492', 'step': 21035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:01.496033', 'step': 21035, 'epoch': 3} {'type': 'loss', 'content': 0.07985152304172516, 'timestamp': '2025-09-10 03:03:01.501914', 'step': 21036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:01.553681', 'step': 21036, 'epoch': 3} {'type': 'loss', 'content': 0.08665875345468521, 'timestamp': '2025-09-10 03:03:01.555677', 'step': 21037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:01.608949', 'step': 21037, 'epoch': 3} {'type': 'loss', 'content': 0.07842781394720078, 'timestamp': '2025-09-10 03:03:01.611179', 'step': 21038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:01.664132', 'step': 21038, 'epoch': 3} {'type': 'loss', 'content': 0.06650084257125854, 'timestamp': '2025-09-10 03:03:01.666400', 'step': 21039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:01.719026', 'step': 21039, 'epoch': 3} {'type': 'loss', 'content': 0.15766675770282745, 'timestamp': '2025-09-10 03:03:01.724724', 'step': 21040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:01.777017', 'step': 21040, 'epoch': 3} {'type': 'loss', 'content': 0.09413578361272812, 'timestamp': '2025-09-10 03:03:01.779145', 'step': 21041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:01.841625', 'step': 21041, 'epoch': 3} {'type': 'loss', 'content': 0.09393827617168427, 'timestamp': '2025-09-10 03:03:01.843561', 'step': 21042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:01.896900', 'step': 21042, 'epoch': 3} {'type': 'loss', 'content': 0.15068426728248596, 'timestamp': '2025-09-10 03:03:01.898815', 'step': 21043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:01.952031', 'step': 21043, 'epoch': 3} {'type': 'loss', 'content': 0.05740409716963768, 'timestamp': '2025-09-10 03:03:01.957902', 'step': 21044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:02.011457', 'step': 21044, 'epoch': 3} {'type': 'loss', 'content': 0.09168461710214615, 'timestamp': '2025-09-10 03:03:02.013429', 'step': 21045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:02.066543', 'step': 21045, 'epoch': 3} {'type': 'loss', 'content': 0.12598252296447754, 'timestamp': '2025-09-10 03:03:02.068504', 'step': 21046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:02.121678', 'step': 21046, 'epoch': 3} {'type': 'loss', 'content': 0.04115432873368263, 'timestamp': '2025-09-10 03:03:02.123547', 'step': 21047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:02.176693', 'step': 21047, 'epoch': 3} {'type': 'loss', 'content': 0.08293559402227402, 'timestamp': '2025-09-10 03:03:02.182385', 'step': 21048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:02.234777', 'step': 21048, 'epoch': 3} {'type': 'loss', 'content': 0.05690658092498779, 'timestamp': '2025-09-10 03:03:02.236852', 'step': 21049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:02.289342', 'step': 21049, 'epoch': 3} {'type': 'loss', 'content': 0.07894469052553177, 'timestamp': '2025-09-10 03:03:02.291447', 'step': 21050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:02.344369', 'step': 21050, 'epoch': 3} {'type': 'loss', 'content': 0.06097037345170975, 'timestamp': '2025-09-10 03:03:02.346325', 'step': 21051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:02.399718', 'step': 21051, 'epoch': 3} {'type': 'loss', 'content': 0.1271578073501587, 'timestamp': '2025-09-10 03:03:02.405733', 'step': 21052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:02.458445', 'step': 21052, 'epoch': 3} {'type': 'loss', 'content': 0.1091030091047287, 'timestamp': '2025-09-10 03:03:02.460813', 'step': 21053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:02.515520', 'step': 21053, 'epoch': 3} {'type': 'loss', 'content': 0.059157419949769974, 'timestamp': '2025-09-10 03:03:02.517961', 'step': 21054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:02.571692', 'step': 21054, 'epoch': 3} {'type': 'loss', 'content': 0.14247877895832062, 'timestamp': '2025-09-10 03:03:02.573845', 'step': 21055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:02.626828', 'step': 21055, 'epoch': 3} {'type': 'loss', 'content': 0.047544926404953, 'timestamp': '2025-09-10 03:03:02.632623', 'step': 21056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:02.684930', 'step': 21056, 'epoch': 3} {'type': 'loss', 'content': 0.08738484233617783, 'timestamp': '2025-09-10 03:03:02.687065', 'step': 21057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:02.740855', 'step': 21057, 'epoch': 3} {'type': 'loss', 'content': 0.18631035089492798, 'timestamp': '2025-09-10 03:03:02.743018', 'step': 21058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:02.796296', 'step': 21058, 'epoch': 3} {'type': 'loss', 'content': 0.14066709578037262, 'timestamp': '2025-09-10 03:03:02.798299', 'step': 21059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:02.852058', 'step': 21059, 'epoch': 3} {'type': 'loss', 'content': 0.06644187122583389, 'timestamp': '2025-09-10 03:03:02.858119', 'step': 21060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:02.911605', 'step': 21060, 'epoch': 3} {'type': 'loss', 'content': 0.03166568651795387, 'timestamp': '2025-09-10 03:03:02.913769', 'step': 21061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:02.968193', 'step': 21061, 'epoch': 3} {'type': 'loss', 'content': 0.13691867887973785, 'timestamp': '2025-09-10 03:03:02.970322', 'step': 21062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:03.025518', 'step': 21062, 'epoch': 3} {'type': 'loss', 'content': 0.07220441102981567, 'timestamp': '2025-09-10 03:03:03.027807', 'step': 21063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:03.080322', 'step': 21063, 'epoch': 3} {'type': 'loss', 'content': 0.06709101796150208, 'timestamp': '2025-09-10 03:03:03.086031', 'step': 21064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:03.138655', 'step': 21064, 'epoch': 3} {'type': 'loss', 'content': 0.1448209583759308, 'timestamp': '2025-09-10 03:03:03.140704', 'step': 21065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:03.193680', 'step': 21065, 'epoch': 3} {'type': 'loss', 'content': 0.056723661720752716, 'timestamp': '2025-09-10 03:03:03.195720', 'step': 21066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:03.249015', 'step': 21066, 'epoch': 3} {'type': 'loss', 'content': 0.08558595925569534, 'timestamp': '2025-09-10 03:03:03.251397', 'step': 21067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:03.304682', 'step': 21067, 'epoch': 3} {'type': 'loss', 'content': 0.1059645563364029, 'timestamp': '2025-09-10 03:03:03.310516', 'step': 21068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:03.363196', 'step': 21068, 'epoch': 3} {'type': 'loss', 'content': 0.09548221528530121, 'timestamp': '2025-09-10 03:03:03.365311', 'step': 21069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:03.418617', 'step': 21069, 'epoch': 3} {'type': 'loss', 'content': 0.09262900799512863, 'timestamp': '2025-09-10 03:03:03.420755', 'step': 21070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:03.473963', 'step': 21070, 'epoch': 3} {'type': 'loss', 'content': 0.08720759302377701, 'timestamp': '2025-09-10 03:03:03.475946', 'step': 21071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:03.531497', 'step': 21071, 'epoch': 3} {'type': 'loss', 'content': 0.07693485915660858, 'timestamp': '2025-09-10 03:03:03.537424', 'step': 21072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:03.590421', 'step': 21072, 'epoch': 3} {'type': 'loss', 'content': 0.1425502896308899, 'timestamp': '2025-09-10 03:03:03.593043', 'step': 21073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:03.646075', 'step': 21073, 'epoch': 3} {'type': 'loss', 'content': 0.06308238953351974, 'timestamp': '2025-09-10 03:03:03.648299', 'step': 21074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:03.700952', 'step': 21074, 'epoch': 3} {'type': 'loss', 'content': 0.06404297053813934, 'timestamp': '2025-09-10 03:03:03.703009', 'step': 21075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:03.756285', 'step': 21075, 'epoch': 3} {'type': 'loss', 'content': 0.14170148968696594, 'timestamp': '2025-09-10 03:03:03.761901', 'step': 21076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:03.814437', 'step': 21076, 'epoch': 3} {'type': 'loss', 'content': 0.10093484073877335, 'timestamp': '2025-09-10 03:03:03.816556', 'step': 21077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:03.869320', 'step': 21077, 'epoch': 3} {'type': 'loss', 'content': 0.2070109099149704, 'timestamp': '2025-09-10 03:03:03.871470', 'step': 21078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:03.924889', 'step': 21078, 'epoch': 3} {'type': 'loss', 'content': 0.07261264324188232, 'timestamp': '2025-09-10 03:03:03.926958', 'step': 21079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:03.981214', 'step': 21079, 'epoch': 3} {'type': 'loss', 'content': 0.040754660964012146, 'timestamp': '2025-09-10 03:03:03.987272', 'step': 21080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:04.041288', 'step': 21080, 'epoch': 3} {'type': 'loss', 'content': 0.07178077846765518, 'timestamp': '2025-09-10 03:03:04.043560', 'step': 21081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:04.096961', 'step': 21081, 'epoch': 3} {'type': 'loss', 'content': 0.09404619038105011, 'timestamp': '2025-09-10 03:03:04.099051', 'step': 21082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:04.152662', 'step': 21082, 'epoch': 3} {'type': 'loss', 'content': 0.10471580922603607, 'timestamp': '2025-09-10 03:03:04.154726', 'step': 21083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:04.208074', 'step': 21083, 'epoch': 3} {'type': 'loss', 'content': 0.11450164765119553, 'timestamp': '2025-09-10 03:03:04.214020', 'step': 21084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:04.271101', 'step': 21084, 'epoch': 3} {'type': 'loss', 'content': 0.0629248172044754, 'timestamp': '2025-09-10 03:03:04.273261', 'step': 21085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:04.326769', 'step': 21085, 'epoch': 3} {'type': 'loss', 'content': 0.0652996227145195, 'timestamp': '2025-09-10 03:03:04.328866', 'step': 21086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:04.382215', 'step': 21086, 'epoch': 3} {'type': 'loss', 'content': 0.1006312295794487, 'timestamp': '2025-09-10 03:03:04.384341', 'step': 21087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:04.437560', 'step': 21087, 'epoch': 3} {'type': 'loss', 'content': 0.06563778966665268, 'timestamp': '2025-09-10 03:03:04.443520', 'step': 21088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:04.497751', 'step': 21088, 'epoch': 3} {'type': 'loss', 'content': 0.07320588082075119, 'timestamp': '2025-09-10 03:03:04.499920', 'step': 21089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:04.553460', 'step': 21089, 'epoch': 3} {'type': 'loss', 'content': 0.06598158925771713, 'timestamp': '2025-09-10 03:03:04.555599', 'step': 21090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:04.609208', 'step': 21090, 'epoch': 3} {'type': 'loss', 'content': 0.10563872754573822, 'timestamp': '2025-09-10 03:03:04.611331', 'step': 21091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:04.664520', 'step': 21091, 'epoch': 3} {'type': 'loss', 'content': 0.07021407783031464, 'timestamp': '2025-09-10 03:03:04.670390', 'step': 21092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:04.722958', 'step': 21092, 'epoch': 3} {'type': 'loss', 'content': 0.019346192479133606, 'timestamp': '2025-09-10 03:03:04.725089', 'step': 21093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:04.778643', 'step': 21093, 'epoch': 3} {'type': 'loss', 'content': 0.12434202432632446, 'timestamp': '2025-09-10 03:03:04.780791', 'step': 21094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:04.834966', 'step': 21094, 'epoch': 3} {'type': 'loss', 'content': 0.04307582229375839, 'timestamp': '2025-09-10 03:03:04.837243', 'step': 21095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:04.890761', 'step': 21095, 'epoch': 3} {'type': 'loss', 'content': 0.09168067574501038, 'timestamp': '2025-09-10 03:03:04.896810', 'step': 21096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:04.949602', 'step': 21096, 'epoch': 3} {'type': 'loss', 'content': 0.12995994091033936, 'timestamp': '2025-09-10 03:03:04.951938', 'step': 21097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:05.006275', 'step': 21097, 'epoch': 3} {'type': 'loss', 'content': 0.13527512550354004, 'timestamp': '2025-09-10 03:03:05.008418', 'step': 21098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:05.063320', 'step': 21098, 'epoch': 3} {'type': 'loss', 'content': 0.04134354367852211, 'timestamp': '2025-09-10 03:03:05.065515', 'step': 21099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:05.118808', 'step': 21099, 'epoch': 3} {'type': 'loss', 'content': 0.13205547630786896, 'timestamp': '2025-09-10 03:03:05.124759', 'step': 21100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:05.177653', 'step': 21100, 'epoch': 3} {'type': 'loss', 'content': 0.052666984498500824, 'timestamp': '2025-09-10 03:03:05.179760', 'step': 21101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:05.234859', 'step': 21101, 'epoch': 3} {'type': 'loss', 'content': 0.07811953127384186, 'timestamp': '2025-09-10 03:03:05.237001', 'step': 21102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:05.290384', 'step': 21102, 'epoch': 3} {'type': 'loss', 'content': 0.14754748344421387, 'timestamp': '2025-09-10 03:03:05.292513', 'step': 21103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:05.346509', 'step': 21103, 'epoch': 3} {'type': 'loss', 'content': 0.08068222552537918, 'timestamp': '2025-09-10 03:03:05.352372', 'step': 21104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:05.405034', 'step': 21104, 'epoch': 3} {'type': 'loss', 'content': 0.055653996765613556, 'timestamp': '2025-09-10 03:03:05.407202', 'step': 21105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:05.460614', 'step': 21105, 'epoch': 3} {'type': 'loss', 'content': 0.052422698587179184, 'timestamp': '2025-09-10 03:03:05.462770', 'step': 21106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:05.516075', 'step': 21106, 'epoch': 3} {'type': 'loss', 'content': 0.103447824716568, 'timestamp': '2025-09-10 03:03:05.518302', 'step': 21107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:05.572506', 'step': 21107, 'epoch': 3} {'type': 'loss', 'content': 0.07105036824941635, 'timestamp': '2025-09-10 03:03:05.578457', 'step': 21108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:05.630978', 'step': 21108, 'epoch': 3} {'type': 'loss', 'content': 0.01943111978471279, 'timestamp': '2025-09-10 03:03:05.633192', 'step': 21109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:05.686677', 'step': 21109, 'epoch': 3} {'type': 'loss', 'content': 0.14586107432842255, 'timestamp': '2025-09-10 03:03:05.688956', 'step': 21110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:05.742691', 'step': 21110, 'epoch': 3} {'type': 'loss', 'content': 0.06911924481391907, 'timestamp': '2025-09-10 03:03:05.745052', 'step': 21111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:05.798508', 'step': 21111, 'epoch': 3} {'type': 'loss', 'content': 0.20258468389511108, 'timestamp': '2025-09-10 03:03:05.804253', 'step': 21112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:05.857862', 'step': 21112, 'epoch': 3} {'type': 'loss', 'content': 0.10108046233654022, 'timestamp': '2025-09-10 03:03:05.860019', 'step': 21113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:05.913529', 'step': 21113, 'epoch': 3} {'type': 'loss', 'content': 0.15760937333106995, 'timestamp': '2025-09-10 03:03:05.915670', 'step': 21114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:05.968684', 'step': 21114, 'epoch': 3} {'type': 'loss', 'content': 0.0606188140809536, 'timestamp': '2025-09-10 03:03:05.970911', 'step': 21115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:06.024598', 'step': 21115, 'epoch': 3} {'type': 'loss', 'content': 0.055699195712804794, 'timestamp': '2025-09-10 03:03:06.030376', 'step': 21116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:06.084378', 'step': 21116, 'epoch': 3} {'type': 'loss', 'content': 0.08265909552574158, 'timestamp': '2025-09-10 03:03:06.086653', 'step': 21117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:06.140092', 'step': 21117, 'epoch': 3} {'type': 'loss', 'content': 0.03047311119735241, 'timestamp': '2025-09-10 03:03:06.142295', 'step': 21118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:06.195910', 'step': 21118, 'epoch': 3} {'type': 'loss', 'content': 0.08461496978998184, 'timestamp': '2025-09-10 03:03:06.198002', 'step': 21119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:06.250802', 'step': 21119, 'epoch': 3} {'type': 'loss', 'content': 0.06131434440612793, 'timestamp': '2025-09-10 03:03:06.256675', 'step': 21120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:06.309273', 'step': 21120, 'epoch': 3} {'type': 'loss', 'content': 0.18468326330184937, 'timestamp': '2025-09-10 03:03:06.311465', 'step': 21121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:06.365595', 'step': 21121, 'epoch': 3} {'type': 'loss', 'content': 0.08671976625919342, 'timestamp': '2025-09-10 03:03:06.367729', 'step': 21122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:06.421198', 'step': 21122, 'epoch': 3} {'type': 'loss', 'content': 0.10516437143087387, 'timestamp': '2025-09-10 03:03:06.423348', 'step': 21123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:06.476899', 'step': 21123, 'epoch': 3} {'type': 'loss', 'content': 0.10966209322214127, 'timestamp': '2025-09-10 03:03:06.482752', 'step': 21124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:06.535716', 'step': 21124, 'epoch': 3} {'type': 'loss', 'content': 0.09391912072896957, 'timestamp': '2025-09-10 03:03:06.538054', 'step': 21125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:06.591565', 'step': 21125, 'epoch': 3} {'type': 'loss', 'content': 0.17985709011554718, 'timestamp': '2025-09-10 03:03:06.593835', 'step': 21126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:06.649140', 'step': 21126, 'epoch': 3} {'type': 'loss', 'content': 0.08056589215993881, 'timestamp': '2025-09-10 03:03:06.651333', 'step': 21127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:06.705275', 'step': 21127, 'epoch': 3} {'type': 'loss', 'content': 0.06572055071592331, 'timestamp': '2025-09-10 03:03:06.711221', 'step': 21128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:06.763782', 'step': 21128, 'epoch': 3} {'type': 'loss', 'content': 0.11052093654870987, 'timestamp': '2025-09-10 03:03:06.766766', 'step': 21129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:06.821381', 'step': 21129, 'epoch': 3} {'type': 'loss', 'content': 0.1131797730922699, 'timestamp': '2025-09-10 03:03:06.823515', 'step': 21130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:06.876976', 'step': 21130, 'epoch': 3} {'type': 'loss', 'content': 0.12694914638996124, 'timestamp': '2025-09-10 03:03:06.879256', 'step': 21131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:06.932117', 'step': 21131, 'epoch': 3} {'type': 'loss', 'content': 0.10230313241481781, 'timestamp': '2025-09-10 03:03:06.937886', 'step': 21132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:06.990647', 'step': 21132, 'epoch': 3} {'type': 'loss', 'content': 0.09851758182048798, 'timestamp': '2025-09-10 03:03:06.992765', 'step': 21133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:07.046678', 'step': 21133, 'epoch': 3} {'type': 'loss', 'content': 0.09232409298419952, 'timestamp': '2025-09-10 03:03:07.048880', 'step': 21134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:07.102514', 'step': 21134, 'epoch': 3} {'type': 'loss', 'content': 0.04828832671046257, 'timestamp': '2025-09-10 03:03:07.104841', 'step': 21135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:07.159676', 'step': 21135, 'epoch': 3} {'type': 'loss', 'content': 0.05745076388120651, 'timestamp': '2025-09-10 03:03:07.165759', 'step': 21136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:07.219432', 'step': 21136, 'epoch': 3} {'type': 'loss', 'content': 0.0739414170384407, 'timestamp': '2025-09-10 03:03:07.221579', 'step': 21137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:07.275550', 'step': 21137, 'epoch': 3} {'type': 'loss', 'content': 0.14605000615119934, 'timestamp': '2025-09-10 03:03:07.277842', 'step': 21138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:07.331451', 'step': 21138, 'epoch': 3} {'type': 'loss', 'content': 0.11288364231586456, 'timestamp': '2025-09-10 03:03:07.333727', 'step': 21139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:07.387420', 'step': 21139, 'epoch': 3} {'type': 'loss', 'content': 0.11497087776660919, 'timestamp': '2025-09-10 03:03:07.393434', 'step': 21140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:07.446296', 'step': 21140, 'epoch': 3} {'type': 'loss', 'content': 0.0706181526184082, 'timestamp': '2025-09-10 03:03:07.448447', 'step': 21141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:07.501668', 'step': 21141, 'epoch': 3} {'type': 'loss', 'content': 0.06358014792203903, 'timestamp': '2025-09-10 03:03:07.503842', 'step': 21142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:07.557251', 'step': 21142, 'epoch': 3} {'type': 'loss', 'content': 0.06653156131505966, 'timestamp': '2025-09-10 03:03:07.559382', 'step': 21143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:07.612510', 'step': 21143, 'epoch': 3} {'type': 'loss', 'content': 0.1147976964712143, 'timestamp': '2025-09-10 03:03:07.618497', 'step': 21144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:07.672313', 'step': 21144, 'epoch': 3} {'type': 'loss', 'content': 0.02561424858868122, 'timestamp': '2025-09-10 03:03:07.674857', 'step': 21145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:07.728654', 'step': 21145, 'epoch': 3} {'type': 'loss', 'content': 0.07613051682710648, 'timestamp': '2025-09-10 03:03:07.731044', 'step': 21146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:07.786169', 'step': 21146, 'epoch': 3} {'type': 'loss', 'content': 0.1085016131401062, 'timestamp': '2025-09-10 03:03:07.788285', 'step': 21147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:07.841734', 'step': 21147, 'epoch': 3} {'type': 'loss', 'content': 0.16524247825145721, 'timestamp': '2025-09-10 03:03:07.847689', 'step': 21148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:07.900994', 'step': 21148, 'epoch': 3} {'type': 'loss', 'content': 0.04841148853302002, 'timestamp': '2025-09-10 03:03:07.903188', 'step': 21149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:07.955759', 'step': 21149, 'epoch': 3} {'type': 'loss', 'content': 0.09110792726278305, 'timestamp': '2025-09-10 03:03:07.957921', 'step': 21150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:08.010902', 'step': 21150, 'epoch': 3} {'type': 'loss', 'content': 0.04831484705209732, 'timestamp': '2025-09-10 03:03:08.013093', 'step': 21151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:08.067119', 'step': 21151, 'epoch': 3} {'type': 'loss', 'content': 0.1471787840127945, 'timestamp': '2025-09-10 03:03:08.072957', 'step': 21152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:08.126027', 'step': 21152, 'epoch': 3} {'type': 'loss', 'content': 0.07593637704849243, 'timestamp': '2025-09-10 03:03:08.128490', 'step': 21153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:08.181856', 'step': 21153, 'epoch': 3} {'type': 'loss', 'content': 0.11929655075073242, 'timestamp': '2025-09-10 03:03:08.184151', 'step': 21154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:08.238800', 'step': 21154, 'epoch': 3} {'type': 'loss', 'content': 0.08580508828163147, 'timestamp': '2025-09-10 03:03:08.240986', 'step': 21155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:08.295007', 'step': 21155, 'epoch': 3} {'type': 'loss', 'content': 0.09533265978097916, 'timestamp': '2025-09-10 03:03:08.301066', 'step': 21156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:08.353362', 'step': 21156, 'epoch': 3} {'type': 'loss', 'content': 0.08479605615139008, 'timestamp': '2025-09-10 03:03:08.355532', 'step': 21157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:08.408901', 'step': 21157, 'epoch': 3} {'type': 'loss', 'content': 0.05405902490019798, 'timestamp': '2025-09-10 03:03:08.411051', 'step': 21158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:08.464165', 'step': 21158, 'epoch': 3} {'type': 'loss', 'content': 0.09428776055574417, 'timestamp': '2025-09-10 03:03:08.466320', 'step': 21159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:08.519580', 'step': 21159, 'epoch': 3} {'type': 'loss', 'content': 0.1289176195859909, 'timestamp': '2025-09-10 03:03:08.525346', 'step': 21160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:08.579003', 'step': 21160, 'epoch': 3} {'type': 'loss', 'content': 0.06083761900663376, 'timestamp': '2025-09-10 03:03:08.581182', 'step': 21161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:08.637204', 'step': 21161, 'epoch': 3} {'type': 'loss', 'content': 0.21385248005390167, 'timestamp': '2025-09-10 03:03:08.639229', 'step': 21162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:08.694498', 'step': 21162, 'epoch': 3} {'type': 'loss', 'content': 0.06865054368972778, 'timestamp': '2025-09-10 03:03:08.696630', 'step': 21163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:08.750013', 'step': 21163, 'epoch': 3} {'type': 'loss', 'content': 0.1110713854432106, 'timestamp': '2025-09-10 03:03:08.756078', 'step': 21164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:08.809548', 'step': 21164, 'epoch': 3} {'type': 'loss', 'content': 0.03382362052798271, 'timestamp': '2025-09-10 03:03:08.811687', 'step': 21165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:08.865033', 'step': 21165, 'epoch': 3} {'type': 'loss', 'content': 0.0686861202120781, 'timestamp': '2025-09-10 03:03:08.867203', 'step': 21166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:08.921947', 'step': 21166, 'epoch': 3} {'type': 'loss', 'content': 0.11192233115434647, 'timestamp': '2025-09-10 03:03:08.924173', 'step': 21167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:08.979878', 'step': 21167, 'epoch': 3} {'type': 'loss', 'content': 0.030066991224884987, 'timestamp': '2025-09-10 03:03:08.985780', 'step': 21168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:09.039998', 'step': 21168, 'epoch': 3} {'type': 'loss', 'content': 0.08932285010814667, 'timestamp': '2025-09-10 03:03:09.042140', 'step': 21169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:09.096958', 'step': 21169, 'epoch': 3} {'type': 'loss', 'content': 0.16400888562202454, 'timestamp': '2025-09-10 03:03:09.099108', 'step': 21170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:09.153655', 'step': 21170, 'epoch': 3} {'type': 'loss', 'content': 0.10179919749498367, 'timestamp': '2025-09-10 03:03:09.155917', 'step': 21171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:09.208822', 'step': 21171, 'epoch': 3} {'type': 'loss', 'content': 0.0934220403432846, 'timestamp': '2025-09-10 03:03:09.215665', 'step': 21172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:09.268733', 'step': 21172, 'epoch': 3} {'type': 'loss', 'content': 0.035968296229839325, 'timestamp': '2025-09-10 03:03:09.270893', 'step': 21173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:09.325378', 'step': 21173, 'epoch': 3} {'type': 'loss', 'content': 0.19382824003696442, 'timestamp': '2025-09-10 03:03:09.327492', 'step': 21174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:09.380752', 'step': 21174, 'epoch': 3} {'type': 'loss', 'content': 0.11482205986976624, 'timestamp': '2025-09-10 03:03:09.382927', 'step': 21175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:03:09.436388', 'step': 21175, 'epoch': 3} {'type': 'loss', 'content': 0.0841941088438034, 'timestamp': '2025-09-10 03:03:09.442303', 'step': 21176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:09.494823', 'step': 21176, 'epoch': 3} {'type': 'loss', 'content': 0.0745898187160492, 'timestamp': '2025-09-10 03:03:09.496952', 'step': 21177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:09.549980', 'step': 21177, 'epoch': 3} {'type': 'loss', 'content': 0.08445335924625397, 'timestamp': '2025-09-10 03:03:09.552121', 'step': 21178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:03:09.606892', 'step': 21178, 'epoch': 3} {'type': 'loss', 'content': 0.12587720155715942, 'timestamp': '2025-09-10 03:03:09.609007', 'step': 21179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:09.662382', 'step': 21179, 'epoch': 3} {'type': 'loss', 'content': 0.06997011601924896, 'timestamp': '2025-09-10 03:03:09.668224', 'step': 21180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:09.720971', 'step': 21180, 'epoch': 3} {'type': 'loss', 'content': 0.027530550956726074, 'timestamp': '2025-09-10 03:03:09.723266', 'step': 21181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:09.776772', 'step': 21181, 'epoch': 3} {'type': 'loss', 'content': 0.03854425996541977, 'timestamp': '2025-09-10 03:03:09.779097', 'step': 21182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:09.833324', 'step': 21182, 'epoch': 3} {'type': 'loss', 'content': 0.07948605716228485, 'timestamp': '2025-09-10 03:03:09.835568', 'step': 21183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:09.889308', 'step': 21183, 'epoch': 3} {'type': 'loss', 'content': 0.13291268050670624, 'timestamp': '2025-09-10 03:03:09.895199', 'step': 21184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:09.948491', 'step': 21184, 'epoch': 3} {'type': 'loss', 'content': 0.08569450676441193, 'timestamp': '2025-09-10 03:03:09.950822', 'step': 21185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:10.004807', 'step': 21185, 'epoch': 3} {'type': 'loss', 'content': 0.09505481272935867, 'timestamp': '2025-09-10 03:03:10.006937', 'step': 21186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:10.061636', 'step': 21186, 'epoch': 3} {'type': 'loss', 'content': 0.09616157412528992, 'timestamp': '2025-09-10 03:03:10.063761', 'step': 21187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:10.120643', 'step': 21187, 'epoch': 3} {'type': 'loss', 'content': 0.10441527515649796, 'timestamp': '2025-09-10 03:03:10.126918', 'step': 21188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:10.180997', 'step': 21188, 'epoch': 3} {'type': 'loss', 'content': 0.11285975575447083, 'timestamp': '2025-09-10 03:03:10.183121', 'step': 21189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:10.236629', 'step': 21189, 'epoch': 3} {'type': 'loss', 'content': 0.12195877730846405, 'timestamp': '2025-09-10 03:03:10.238737', 'step': 21190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:10.292085', 'step': 21190, 'epoch': 3} {'type': 'loss', 'content': 0.08512648195028305, 'timestamp': '2025-09-10 03:03:10.294170', 'step': 21191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:10.347612', 'step': 21191, 'epoch': 3} {'type': 'loss', 'content': 0.07995661348104477, 'timestamp': '2025-09-10 03:03:10.353632', 'step': 21192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:10.406361', 'step': 21192, 'epoch': 3} {'type': 'loss', 'content': 0.060081847012043, 'timestamp': '2025-09-10 03:03:10.408477', 'step': 21193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:10.461208', 'step': 21193, 'epoch': 3} {'type': 'loss', 'content': 0.1410904824733734, 'timestamp': '2025-09-10 03:03:10.463360', 'step': 21194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:10.517265', 'step': 21194, 'epoch': 3} {'type': 'loss', 'content': 0.05626499280333519, 'timestamp': '2025-09-10 03:03:10.519566', 'step': 21195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:10.574226', 'step': 21195, 'epoch': 3} {'type': 'loss', 'content': 0.1498105674982071, 'timestamp': '2025-09-10 03:03:10.580198', 'step': 21196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:10.633425', 'step': 21196, 'epoch': 3} {'type': 'loss', 'content': 0.05021645873785019, 'timestamp': '2025-09-10 03:03:10.635601', 'step': 21197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:10.688727', 'step': 21197, 'epoch': 3} {'type': 'loss', 'content': 0.1160682737827301, 'timestamp': '2025-09-10 03:03:10.690947', 'step': 21198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:10.744525', 'step': 21198, 'epoch': 3} {'type': 'loss', 'content': 0.09324102103710175, 'timestamp': '2025-09-10 03:03:10.746699', 'step': 21199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:10.800225', 'step': 21199, 'epoch': 3} {'type': 'loss', 'content': 0.09926635771989822, 'timestamp': '2025-09-10 03:03:10.806082', 'step': 21200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:10.862785', 'step': 21200, 'epoch': 3} {'type': 'loss', 'content': 0.12593646347522736, 'timestamp': '2025-09-10 03:03:10.864912', 'step': 21201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:10.918108', 'step': 21201, 'epoch': 3} {'type': 'loss', 'content': 0.029101429507136345, 'timestamp': '2025-09-10 03:03:10.920296', 'step': 21202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:10.973237', 'step': 21202, 'epoch': 3} {'type': 'loss', 'content': 0.14139342308044434, 'timestamp': '2025-09-10 03:03:10.975403', 'step': 21203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:11.029266', 'step': 21203, 'epoch': 3} {'type': 'loss', 'content': 0.07580125331878662, 'timestamp': '2025-09-10 03:03:11.036401', 'step': 21204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:11.091436', 'step': 21204, 'epoch': 3} {'type': 'loss', 'content': 0.11845618486404419, 'timestamp': '2025-09-10 03:03:11.093606', 'step': 21205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:11.146959', 'step': 21205, 'epoch': 3} {'type': 'loss', 'content': 0.08378750830888748, 'timestamp': '2025-09-10 03:03:11.149120', 'step': 21206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:11.203314', 'step': 21206, 'epoch': 3} {'type': 'loss', 'content': 0.09166446328163147, 'timestamp': '2025-09-10 03:03:11.205497', 'step': 21207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:11.258653', 'step': 21207, 'epoch': 3} {'type': 'loss', 'content': 0.11212371289730072, 'timestamp': '2025-09-10 03:03:11.264552', 'step': 21208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:11.317277', 'step': 21208, 'epoch': 3} {'type': 'loss', 'content': 0.1024768054485321, 'timestamp': '2025-09-10 03:03:11.319407', 'step': 21209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:11.372570', 'step': 21209, 'epoch': 3} {'type': 'loss', 'content': 0.10834603756666183, 'timestamp': '2025-09-10 03:03:11.374830', 'step': 21210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:11.429659', 'step': 21210, 'epoch': 3} {'type': 'loss', 'content': 0.11678827553987503, 'timestamp': '2025-09-10 03:03:11.431949', 'step': 21211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:11.485763', 'step': 21211, 'epoch': 3} {'type': 'loss', 'content': 0.15243254601955414, 'timestamp': '2025-09-10 03:03:11.491638', 'step': 21212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:11.544946', 'step': 21212, 'epoch': 3} {'type': 'loss', 'content': 0.04919995367527008, 'timestamp': '2025-09-10 03:03:11.547056', 'step': 21213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:11.601440', 'step': 21213, 'epoch': 3} {'type': 'loss', 'content': 0.17877499759197235, 'timestamp': '2025-09-10 03:03:11.603717', 'step': 21214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:11.657865', 'step': 21214, 'epoch': 3} {'type': 'loss', 'content': 0.15410947799682617, 'timestamp': '2025-09-10 03:03:11.660174', 'step': 21215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:11.714054', 'step': 21215, 'epoch': 3} {'type': 'loss', 'content': 0.057113755494356155, 'timestamp': '2025-09-10 03:03:11.720286', 'step': 21216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:11.773005', 'step': 21216, 'epoch': 3} {'type': 'loss', 'content': 0.06780649721622467, 'timestamp': '2025-09-10 03:03:11.775172', 'step': 21217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:11.828297', 'step': 21217, 'epoch': 3} {'type': 'loss', 'content': 0.1825079768896103, 'timestamp': '2025-09-10 03:03:11.830451', 'step': 21218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:11.883445', 'step': 21218, 'epoch': 3} {'type': 'loss', 'content': 0.10078652203083038, 'timestamp': '2025-09-10 03:03:11.885670', 'step': 21219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:11.938991', 'step': 21219, 'epoch': 3} {'type': 'loss', 'content': 0.16764409840106964, 'timestamp': '2025-09-10 03:03:11.944909', 'step': 21220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:11.997530', 'step': 21220, 'epoch': 3} {'type': 'loss', 'content': 0.08784303814172745, 'timestamp': '2025-09-10 03:03:11.999705', 'step': 21221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:12.052745', 'step': 21221, 'epoch': 3} {'type': 'loss', 'content': 0.06023812294006348, 'timestamp': '2025-09-10 03:03:12.054675', 'step': 21222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:12.107663', 'step': 21222, 'epoch': 3} {'type': 'loss', 'content': 0.02803823910653591, 'timestamp': '2025-09-10 03:03:12.109368', 'step': 21223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:12.162568', 'step': 21223, 'epoch': 3} {'type': 'loss', 'content': 0.054854314774274826, 'timestamp': '2025-09-10 03:03:12.168612', 'step': 21224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:12.222434', 'step': 21224, 'epoch': 3} {'type': 'loss', 'content': 0.12549039721488953, 'timestamp': '2025-09-10 03:03:12.224604', 'step': 21225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:12.280406', 'step': 21225, 'epoch': 3} {'type': 'loss', 'content': 0.06634840369224548, 'timestamp': '2025-09-10 03:03:12.282857', 'step': 21226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:12.336512', 'step': 21226, 'epoch': 3} {'type': 'loss', 'content': 0.15452980995178223, 'timestamp': '2025-09-10 03:03:12.338483', 'step': 21227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:03:12.392782', 'step': 21227, 'epoch': 3} {'type': 'loss', 'content': 0.04645654559135437, 'timestamp': '2025-09-10 03:03:12.398354', 'step': 21228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:12.458575', 'step': 21228, 'epoch': 3} {'type': 'loss', 'content': 0.06963670998811722, 'timestamp': '2025-09-10 03:03:12.460754', 'step': 21229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:12.514334', 'step': 21229, 'epoch': 3} {'type': 'loss', 'content': 0.08217328786849976, 'timestamp': '2025-09-10 03:03:12.516521', 'step': 21230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:12.574835', 'step': 21230, 'epoch': 3} {'type': 'loss', 'content': 0.08943367004394531, 'timestamp': '2025-09-10 03:03:12.576626', 'step': 21231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:12.629788', 'step': 21231, 'epoch': 3} {'type': 'loss', 'content': 0.016457919031381607, 'timestamp': '2025-09-10 03:03:12.635723', 'step': 21232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:12.689179', 'step': 21232, 'epoch': 3} {'type': 'loss', 'content': 0.1226835697889328, 'timestamp': '2025-09-10 03:03:12.691330', 'step': 21233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:12.747011', 'step': 21233, 'epoch': 3} {'type': 'loss', 'content': 0.08691948652267456, 'timestamp': '2025-09-10 03:03:12.749149', 'step': 21234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:12.803379', 'step': 21234, 'epoch': 3} {'type': 'loss', 'content': 0.18046888709068298, 'timestamp': '2025-09-10 03:03:12.805376', 'step': 21235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:12.859261', 'step': 21235, 'epoch': 3} {'type': 'loss', 'content': 0.19122765958309174, 'timestamp': '2025-09-10 03:03:12.865023', 'step': 21236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:12.918129', 'step': 21236, 'epoch': 3} {'type': 'loss', 'content': 0.1309051364660263, 'timestamp': '2025-09-10 03:03:12.920407', 'step': 21237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:12.973153', 'step': 21237, 'epoch': 3} {'type': 'loss', 'content': 0.1764240264892578, 'timestamp': '2025-09-10 03:03:12.975259', 'step': 21238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:13.028955', 'step': 21238, 'epoch': 3} {'type': 'loss', 'content': 0.1618615686893463, 'timestamp': '2025-09-10 03:03:13.031247', 'step': 21239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:13.084430', 'step': 21239, 'epoch': 3} {'type': 'loss', 'content': 0.06894697993993759, 'timestamp': '2025-09-10 03:03:13.090384', 'step': 21240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:13.142892', 'step': 21240, 'epoch': 3} {'type': 'loss', 'content': 0.11184187978506088, 'timestamp': '2025-09-10 03:03:13.145036', 'step': 21241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:13.197688', 'step': 21241, 'epoch': 3} {'type': 'loss', 'content': 0.14840519428253174, 'timestamp': '2025-09-10 03:03:13.199816', 'step': 21242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:13.255323', 'step': 21242, 'epoch': 3} {'type': 'loss', 'content': 0.0928463563323021, 'timestamp': '2025-09-10 03:03:13.258612', 'step': 21243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:13.312484', 'step': 21243, 'epoch': 3} {'type': 'loss', 'content': 0.09302284568548203, 'timestamp': '2025-09-10 03:03:13.318405', 'step': 21244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:13.371211', 'step': 21244, 'epoch': 3} {'type': 'loss', 'content': 0.08231531083583832, 'timestamp': '2025-09-10 03:03:13.374034', 'step': 21245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:13.435409', 'step': 21245, 'epoch': 3} {'type': 'loss', 'content': 0.05766492336988449, 'timestamp': '2025-09-10 03:03:13.437351', 'step': 21246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:13.489947', 'step': 21246, 'epoch': 3} {'type': 'loss', 'content': 0.05058949813246727, 'timestamp': '2025-09-10 03:03:13.491998', 'step': 21247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:13.553613', 'step': 21247, 'epoch': 3} {'type': 'loss', 'content': 0.049991730600595474, 'timestamp': '2025-09-10 03:03:13.559569', 'step': 21248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:13.612762', 'step': 21248, 'epoch': 3} {'type': 'loss', 'content': 0.08828391879796982, 'timestamp': '2025-09-10 03:03:13.615023', 'step': 21249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:13.668723', 'step': 21249, 'epoch': 3} {'type': 'loss', 'content': 0.10235819220542908, 'timestamp': '2025-09-10 03:03:13.670928', 'step': 21250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:13.724744', 'step': 21250, 'epoch': 3} {'type': 'loss', 'content': 0.11533354222774506, 'timestamp': '2025-09-10 03:03:13.726905', 'step': 21251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:13.780708', 'step': 21251, 'epoch': 3} {'type': 'loss', 'content': 0.06850685179233551, 'timestamp': '2025-09-10 03:03:13.788232', 'step': 21252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:13.844236', 'step': 21252, 'epoch': 3} {'type': 'loss', 'content': 0.05359021946787834, 'timestamp': '2025-09-10 03:03:13.846377', 'step': 21253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:13.904606', 'step': 21253, 'epoch': 3} {'type': 'loss', 'content': 0.0989060178399086, 'timestamp': '2025-09-10 03:03:13.907059', 'step': 21254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:13.964081', 'step': 21254, 'epoch': 3} {'type': 'loss', 'content': 0.10050415992736816, 'timestamp': '2025-09-10 03:03:13.966215', 'step': 21255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:14.028977', 'step': 21255, 'epoch': 3} {'type': 'loss', 'content': 0.14123189449310303, 'timestamp': '2025-09-10 03:03:14.035294', 'step': 21256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:14.089357', 'step': 21256, 'epoch': 3} {'type': 'loss', 'content': 0.05473306030035019, 'timestamp': '2025-09-10 03:03:14.091541', 'step': 21257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:14.152003', 'step': 21257, 'epoch': 3} {'type': 'loss', 'content': 0.08988995850086212, 'timestamp': '2025-09-10 03:03:14.154147', 'step': 21258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:14.208182', 'step': 21258, 'epoch': 3} {'type': 'loss', 'content': 0.08838620781898499, 'timestamp': '2025-09-10 03:03:14.210306', 'step': 21259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:14.262981', 'step': 21259, 'epoch': 3} {'type': 'loss', 'content': 0.0697578564286232, 'timestamp': '2025-09-10 03:03:14.268720', 'step': 21260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:14.322248', 'step': 21260, 'epoch': 3} {'type': 'loss', 'content': 0.08338769525289536, 'timestamp': '2025-09-10 03:03:14.324369', 'step': 21261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:14.377788', 'step': 21261, 'epoch': 3} {'type': 'loss', 'content': 0.10293252766132355, 'timestamp': '2025-09-10 03:03:14.379959', 'step': 21262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:14.433096', 'step': 21262, 'epoch': 3} {'type': 'loss', 'content': 0.1326112598180771, 'timestamp': '2025-09-10 03:03:14.435342', 'step': 21263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:14.488560', 'step': 21263, 'epoch': 3} {'type': 'loss', 'content': 0.055604878813028336, 'timestamp': '2025-09-10 03:03:14.494594', 'step': 21264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:14.547167', 'step': 21264, 'epoch': 3} {'type': 'loss', 'content': 0.12086304277181625, 'timestamp': '2025-09-10 03:03:14.549408', 'step': 21265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:14.602685', 'step': 21265, 'epoch': 3} {'type': 'loss', 'content': 0.04753192886710167, 'timestamp': '2025-09-10 03:03:14.604813', 'step': 21266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:14.659194', 'step': 21266, 'epoch': 3} {'type': 'loss', 'content': 0.08486893028020859, 'timestamp': '2025-09-10 03:03:14.661590', 'step': 21267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:14.714968', 'step': 21267, 'epoch': 3} {'type': 'loss', 'content': 0.06404236704111099, 'timestamp': '2025-09-10 03:03:14.721036', 'step': 21268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:14.774543', 'step': 21268, 'epoch': 3} {'type': 'loss', 'content': 0.05704633146524429, 'timestamp': '2025-09-10 03:03:14.776995', 'step': 21269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:14.830898', 'step': 21269, 'epoch': 3} {'type': 'loss', 'content': 0.11713400483131409, 'timestamp': '2025-09-10 03:03:14.833121', 'step': 21270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:14.887497', 'step': 21270, 'epoch': 3} {'type': 'loss', 'content': 0.05070742219686508, 'timestamp': '2025-09-10 03:03:14.889593', 'step': 21271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:14.943135', 'step': 21271, 'epoch': 3} {'type': 'loss', 'content': 0.06090597063302994, 'timestamp': '2025-09-10 03:03:14.949078', 'step': 21272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:15.002148', 'step': 21272, 'epoch': 3} {'type': 'loss', 'content': 0.08025521785020828, 'timestamp': '2025-09-10 03:03:15.004588', 'step': 21273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:15.059676', 'step': 21273, 'epoch': 3} {'type': 'loss', 'content': 0.09189219027757645, 'timestamp': '2025-09-10 03:03:15.062122', 'step': 21274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:15.115517', 'step': 21274, 'epoch': 3} {'type': 'loss', 'content': 0.11823539435863495, 'timestamp': '2025-09-10 03:03:15.117833', 'step': 21275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:15.171602', 'step': 21275, 'epoch': 3} {'type': 'loss', 'content': 0.09490624815225601, 'timestamp': '2025-09-10 03:03:15.177480', 'step': 21276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:15.230976', 'step': 21276, 'epoch': 3} {'type': 'loss', 'content': 0.06740394234657288, 'timestamp': '2025-09-10 03:03:15.233111', 'step': 21277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:15.287066', 'step': 21277, 'epoch': 3} {'type': 'loss', 'content': 0.1654546558856964, 'timestamp': '2025-09-10 03:03:15.289364', 'step': 21278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:15.343285', 'step': 21278, 'epoch': 3} {'type': 'loss', 'content': 0.09902098029851913, 'timestamp': '2025-09-10 03:03:15.345302', 'step': 21279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:15.399238', 'step': 21279, 'epoch': 3} {'type': 'loss', 'content': 0.18989451229572296, 'timestamp': '2025-09-10 03:03:15.405196', 'step': 21280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:15.461923', 'step': 21280, 'epoch': 3} {'type': 'loss', 'content': 0.07583971321582794, 'timestamp': '2025-09-10 03:03:15.463966', 'step': 21281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:15.516960', 'step': 21281, 'epoch': 3} {'type': 'loss', 'content': 0.06173162907361984, 'timestamp': '2025-09-10 03:03:15.519152', 'step': 21282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:15.579626', 'step': 21282, 'epoch': 3} {'type': 'loss', 'content': 0.0881115049123764, 'timestamp': '2025-09-10 03:03:15.581839', 'step': 21283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:15.635333', 'step': 21283, 'epoch': 3} {'type': 'loss', 'content': 0.15401513874530792, 'timestamp': '2025-09-10 03:03:15.640942', 'step': 21284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:15.693809', 'step': 21284, 'epoch': 3} {'type': 'loss', 'content': 0.07889366894960403, 'timestamp': '2025-09-10 03:03:15.695738', 'step': 21285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:15.748552', 'step': 21285, 'epoch': 3} {'type': 'loss', 'content': 0.06292003393173218, 'timestamp': '2025-09-10 03:03:15.750724', 'step': 21286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:15.804363', 'step': 21286, 'epoch': 3} {'type': 'loss', 'content': 0.05203321948647499, 'timestamp': '2025-09-10 03:03:15.806562', 'step': 21287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:15.859891', 'step': 21287, 'epoch': 3} {'type': 'loss', 'content': 0.08360572904348373, 'timestamp': '2025-09-10 03:03:15.865928', 'step': 21288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:15.919109', 'step': 21288, 'epoch': 3} {'type': 'loss', 'content': 0.09132701903581619, 'timestamp': '2025-09-10 03:03:15.921311', 'step': 21289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:15.974683', 'step': 21289, 'epoch': 3} {'type': 'loss', 'content': 0.13684488832950592, 'timestamp': '2025-09-10 03:03:15.976914', 'step': 21290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:16.040436', 'step': 21290, 'epoch': 3} {'type': 'loss', 'content': 0.09456279128789902, 'timestamp': '2025-09-10 03:03:16.042575', 'step': 21291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:16.095782', 'step': 21291, 'epoch': 3} {'type': 'loss', 'content': 0.04675775766372681, 'timestamp': '2025-09-10 03:03:16.101725', 'step': 21292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:16.155189', 'step': 21292, 'epoch': 3} {'type': 'loss', 'content': 0.0806850716471672, 'timestamp': '2025-09-10 03:03:16.157401', 'step': 21293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:16.210780', 'step': 21293, 'epoch': 3} {'type': 'loss', 'content': 0.04993873089551926, 'timestamp': '2025-09-10 03:03:16.212959', 'step': 21294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:16.266181', 'step': 21294, 'epoch': 3} {'type': 'loss', 'content': 0.05500339716672897, 'timestamp': '2025-09-10 03:03:16.268373', 'step': 21295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:16.321726', 'step': 21295, 'epoch': 3} {'type': 'loss', 'content': 0.11189203709363937, 'timestamp': '2025-09-10 03:03:16.327692', 'step': 21296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:16.380412', 'step': 21296, 'epoch': 3} {'type': 'loss', 'content': 0.08720347285270691, 'timestamp': '2025-09-10 03:03:16.382719', 'step': 21297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:16.436748', 'step': 21297, 'epoch': 3} {'type': 'loss', 'content': 0.09508714079856873, 'timestamp': '2025-09-10 03:03:16.438941', 'step': 21298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:16.494510', 'step': 21298, 'epoch': 3} {'type': 'loss', 'content': 0.1197710931301117, 'timestamp': '2025-09-10 03:03:16.496706', 'step': 21299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:16.551379', 'step': 21299, 'epoch': 3} {'type': 'loss', 'content': 0.13239231705665588, 'timestamp': '2025-09-10 03:03:16.557359', 'step': 21300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:16.618472', 'step': 21300, 'epoch': 3} {'type': 'loss', 'content': 0.042857445776462555, 'timestamp': '2025-09-10 03:03:16.620484', 'step': 21301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:16.678056', 'step': 21301, 'epoch': 3} {'type': 'loss', 'content': 0.11934059858322144, 'timestamp': '2025-09-10 03:03:16.680383', 'step': 21302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:16.743655', 'step': 21302, 'epoch': 3} {'type': 'loss', 'content': 0.05543304234743118, 'timestamp': '2025-09-10 03:03:16.745911', 'step': 21303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:16.814733', 'step': 21303, 'epoch': 3} {'type': 'loss', 'content': 0.07022888958454132, 'timestamp': '2025-09-10 03:03:16.820876', 'step': 21304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:16.891167', 'step': 21304, 'epoch': 3} {'type': 'loss', 'content': 0.08187763392925262, 'timestamp': '2025-09-10 03:03:16.893526', 'step': 21305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:16.947980', 'step': 21305, 'epoch': 3} {'type': 'loss', 'content': 0.14240656793117523, 'timestamp': '2025-09-10 03:03:16.950173', 'step': 21306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:17.004878', 'step': 21306, 'epoch': 3} {'type': 'loss', 'content': 0.17263001203536987, 'timestamp': '2025-09-10 03:03:17.006980', 'step': 21307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:17.060859', 'step': 21307, 'epoch': 3} {'type': 'loss', 'content': 0.1409062147140503, 'timestamp': '2025-09-10 03:03:17.066805', 'step': 21308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:17.119770', 'step': 21308, 'epoch': 3} {'type': 'loss', 'content': 0.09205201268196106, 'timestamp': '2025-09-10 03:03:17.122025', 'step': 21309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:17.175704', 'step': 21309, 'epoch': 3} {'type': 'loss', 'content': 0.08735696226358414, 'timestamp': '2025-09-10 03:03:17.177720', 'step': 21310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:17.231311', 'step': 21310, 'epoch': 3} {'type': 'loss', 'content': 0.1060500517487526, 'timestamp': '2025-09-10 03:03:17.237236', 'step': 21311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:17.293265', 'step': 21311, 'epoch': 3} {'type': 'loss', 'content': 0.14343422651290894, 'timestamp': '2025-09-10 03:03:17.299347', 'step': 21312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:17.352091', 'step': 21312, 'epoch': 3} {'type': 'loss', 'content': 0.07734488695859909, 'timestamp': '2025-09-10 03:03:17.354336', 'step': 21313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:17.417045', 'step': 21313, 'epoch': 3} {'type': 'loss', 'content': 0.14119820296764374, 'timestamp': '2025-09-10 03:03:17.418991', 'step': 21314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:17.472301', 'step': 21314, 'epoch': 3} {'type': 'loss', 'content': 0.072455994784832, 'timestamp': '2025-09-10 03:03:17.474805', 'step': 21315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:17.533753', 'step': 21315, 'epoch': 3} {'type': 'loss', 'content': 0.13474303483963013, 'timestamp': '2025-09-10 03:03:17.546148', 'step': 21316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:17.602015', 'step': 21316, 'epoch': 3} {'type': 'loss', 'content': 0.07173620909452438, 'timestamp': '2025-09-10 03:03:17.604293', 'step': 21317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:17.658647', 'step': 21317, 'epoch': 3} {'type': 'loss', 'content': 0.13385656476020813, 'timestamp': '2025-09-10 03:03:17.660866', 'step': 21318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:17.714635', 'step': 21318, 'epoch': 3} {'type': 'loss', 'content': 0.11699678003787994, 'timestamp': '2025-09-10 03:03:17.716943', 'step': 21319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:17.770245', 'step': 21319, 'epoch': 3} {'type': 'loss', 'content': 0.06159936636686325, 'timestamp': '2025-09-10 03:03:17.776237', 'step': 21320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:17.830004', 'step': 21320, 'epoch': 3} {'type': 'loss', 'content': 0.12908940017223358, 'timestamp': '2025-09-10 03:03:17.832108', 'step': 21321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:17.899011', 'step': 21321, 'epoch': 3} {'type': 'loss', 'content': 0.13952422142028809, 'timestamp': '2025-09-10 03:03:17.901237', 'step': 21322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:17.954450', 'step': 21322, 'epoch': 3} {'type': 'loss', 'content': 0.10448319464921951, 'timestamp': '2025-09-10 03:03:17.956599', 'step': 21323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:18.010668', 'step': 21323, 'epoch': 3} {'type': 'loss', 'content': 0.09954443573951721, 'timestamp': '2025-09-10 03:03:18.016512', 'step': 21324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:18.071091', 'step': 21324, 'epoch': 3} {'type': 'loss', 'content': 0.10853846371173859, 'timestamp': '2025-09-10 03:03:18.073461', 'step': 21325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:18.127092', 'step': 21325, 'epoch': 3} {'type': 'loss', 'content': 0.05758560076355934, 'timestamp': '2025-09-10 03:03:18.129435', 'step': 21326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:18.194671', 'step': 21326, 'epoch': 3} {'type': 'loss', 'content': 0.1115397959947586, 'timestamp': '2025-09-10 03:03:18.197070', 'step': 21327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:18.250448', 'step': 21327, 'epoch': 3} {'type': 'loss', 'content': 0.023921530693769455, 'timestamp': '2025-09-10 03:03:18.256382', 'step': 21328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:18.310690', 'step': 21328, 'epoch': 3} {'type': 'loss', 'content': 0.06704313308000565, 'timestamp': '2025-09-10 03:03:18.312826', 'step': 21329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:18.366656', 'step': 21329, 'epoch': 3} {'type': 'loss', 'content': 0.11102121323347092, 'timestamp': '2025-09-10 03:03:18.368883', 'step': 21330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:18.422160', 'step': 21330, 'epoch': 3} {'type': 'loss', 'content': 0.05040701478719711, 'timestamp': '2025-09-10 03:03:18.424376', 'step': 21331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:18.477151', 'step': 21331, 'epoch': 3} {'type': 'loss', 'content': 0.03538437932729721, 'timestamp': '2025-09-10 03:03:18.482995', 'step': 21332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:03:18.535750', 'step': 21332, 'epoch': 3} {'type': 'loss', 'content': 0.036270659416913986, 'timestamp': '2025-09-10 03:03:18.538314', 'step': 21333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:18.592465', 'step': 21333, 'epoch': 3} {'type': 'loss', 'content': 0.14426937699317932, 'timestamp': '2025-09-10 03:03:18.594780', 'step': 21334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:18.651965', 'step': 21334, 'epoch': 3} {'type': 'loss', 'content': 0.06537249684333801, 'timestamp': '2025-09-10 03:03:18.653888', 'step': 21335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:18.707934', 'step': 21335, 'epoch': 3} {'type': 'loss', 'content': 0.07370973378419876, 'timestamp': '2025-09-10 03:03:18.713860', 'step': 21336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:18.767132', 'step': 21336, 'epoch': 3} {'type': 'loss', 'content': 0.03748529776930809, 'timestamp': '2025-09-10 03:03:18.769274', 'step': 21337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:18.823803', 'step': 21337, 'epoch': 3} {'type': 'loss', 'content': 0.044940974563360214, 'timestamp': '2025-09-10 03:03:18.825954', 'step': 21338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:18.879271', 'step': 21338, 'epoch': 3} {'type': 'loss', 'content': 0.14580830931663513, 'timestamp': '2025-09-10 03:03:18.881606', 'step': 21339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:18.935318', 'step': 21339, 'epoch': 3} {'type': 'loss', 'content': 0.060842789709568024, 'timestamp': '2025-09-10 03:03:18.941371', 'step': 21340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:18.994267', 'step': 21340, 'epoch': 3} {'type': 'loss', 'content': 0.07303255051374435, 'timestamp': '2025-09-10 03:03:18.996468', 'step': 21341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:19.050059', 'step': 21341, 'epoch': 3} {'type': 'loss', 'content': 0.12462534010410309, 'timestamp': '2025-09-10 03:03:19.052200', 'step': 21342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:19.106191', 'step': 21342, 'epoch': 3} {'type': 'loss', 'content': 0.0724533200263977, 'timestamp': '2025-09-10 03:03:19.108477', 'step': 21343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:19.164890', 'step': 21343, 'epoch': 3} {'type': 'loss', 'content': 0.09865687042474747, 'timestamp': '2025-09-10 03:03:19.171108', 'step': 21344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:19.224823', 'step': 21344, 'epoch': 3} {'type': 'loss', 'content': 0.04213276877999306, 'timestamp': '2025-09-10 03:03:19.227101', 'step': 21345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:19.280635', 'step': 21345, 'epoch': 3} {'type': 'loss', 'content': 0.06180691719055176, 'timestamp': '2025-09-10 03:03:19.282849', 'step': 21346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:19.337275', 'step': 21346, 'epoch': 3} {'type': 'loss', 'content': 0.06465917825698853, 'timestamp': '2025-09-10 03:03:19.339533', 'step': 21347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:19.393570', 'step': 21347, 'epoch': 3} {'type': 'loss', 'content': 0.08435256779193878, 'timestamp': '2025-09-10 03:03:19.399504', 'step': 21348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:19.452704', 'step': 21348, 'epoch': 3} {'type': 'loss', 'content': 0.047884244471788406, 'timestamp': '2025-09-10 03:03:19.455011', 'step': 21349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:19.508896', 'step': 21349, 'epoch': 3} {'type': 'loss', 'content': 0.1071101501584053, 'timestamp': '2025-09-10 03:03:19.511052', 'step': 21350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:19.565018', 'step': 21350, 'epoch': 3} {'type': 'loss', 'content': 0.0814780592918396, 'timestamp': '2025-09-10 03:03:19.567659', 'step': 21351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:19.621384', 'step': 21351, 'epoch': 3} {'type': 'loss', 'content': 0.05011413246393204, 'timestamp': '2025-09-10 03:03:19.627351', 'step': 21352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:19.682310', 'step': 21352, 'epoch': 3} {'type': 'loss', 'content': 0.08214857429265976, 'timestamp': '2025-09-10 03:03:19.684468', 'step': 21353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:19.739088', 'step': 21353, 'epoch': 3} {'type': 'loss', 'content': 0.04940514266490936, 'timestamp': '2025-09-10 03:03:19.741409', 'step': 21354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:19.794989', 'step': 21354, 'epoch': 3} {'type': 'loss', 'content': 0.11697495728731155, 'timestamp': '2025-09-10 03:03:19.797393', 'step': 21355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:19.851698', 'step': 21355, 'epoch': 3} {'type': 'loss', 'content': 0.10553240031003952, 'timestamp': '2025-09-10 03:03:19.857719', 'step': 21356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:19.910721', 'step': 21356, 'epoch': 3} {'type': 'loss', 'content': 0.11775070428848267, 'timestamp': '2025-09-10 03:03:19.912946', 'step': 21357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:19.968639', 'step': 21357, 'epoch': 3} {'type': 'loss', 'content': 0.08330030739307404, 'timestamp': '2025-09-10 03:03:19.970746', 'step': 21358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:20.024543', 'step': 21358, 'epoch': 3} {'type': 'loss', 'content': 0.09724894165992737, 'timestamp': '2025-09-10 03:03:20.026602', 'step': 21359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:20.079866', 'step': 21359, 'epoch': 3} {'type': 'loss', 'content': 0.02129201404750347, 'timestamp': '2025-09-10 03:03:20.085669', 'step': 21360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:20.140630', 'step': 21360, 'epoch': 3} {'type': 'loss', 'content': 0.10798800736665726, 'timestamp': '2025-09-10 03:03:20.142813', 'step': 21361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:20.196658', 'step': 21361, 'epoch': 3} {'type': 'loss', 'content': 0.08593587577342987, 'timestamp': '2025-09-10 03:03:20.198749', 'step': 21362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:20.252846', 'step': 21362, 'epoch': 3} {'type': 'loss', 'content': 0.07083553075790405, 'timestamp': '2025-09-10 03:03:20.255120', 'step': 21363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:20.311545', 'step': 21363, 'epoch': 3} {'type': 'loss', 'content': 0.11766123026609421, 'timestamp': '2025-09-10 03:03:20.317391', 'step': 21364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:20.371075', 'step': 21364, 'epoch': 3} {'type': 'loss', 'content': 0.0898113027215004, 'timestamp': '2025-09-10 03:03:20.373595', 'step': 21365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:20.427157', 'step': 21365, 'epoch': 3} {'type': 'loss', 'content': 0.11851572245359421, 'timestamp': '2025-09-10 03:03:20.429395', 'step': 21366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:20.482532', 'step': 21366, 'epoch': 3} {'type': 'loss', 'content': 0.14812277257442474, 'timestamp': '2025-09-10 03:03:20.484844', 'step': 21367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:20.538654', 'step': 21367, 'epoch': 3} {'type': 'loss', 'content': 0.11833663284778595, 'timestamp': '2025-09-10 03:03:20.544697', 'step': 21368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:20.597705', 'step': 21368, 'epoch': 3} {'type': 'loss', 'content': 0.04244137182831764, 'timestamp': '2025-09-10 03:03:20.599993', 'step': 21369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:20.653612', 'step': 21369, 'epoch': 3} {'type': 'loss', 'content': 0.03337760269641876, 'timestamp': '2025-09-10 03:03:20.655817', 'step': 21370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:20.710193', 'step': 21370, 'epoch': 3} {'type': 'loss', 'content': 0.047985006123781204, 'timestamp': '2025-09-10 03:03:20.712410', 'step': 21371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:20.766805', 'step': 21371, 'epoch': 3} {'type': 'loss', 'content': 0.07608148455619812, 'timestamp': '2025-09-10 03:03:20.772809', 'step': 21372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:20.826193', 'step': 21372, 'epoch': 3} {'type': 'loss', 'content': 0.0767849013209343, 'timestamp': '2025-09-10 03:03:20.828354', 'step': 21373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:20.881806', 'step': 21373, 'epoch': 3} {'type': 'loss', 'content': 0.09738489240407944, 'timestamp': '2025-09-10 03:03:20.885031', 'step': 21374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:20.938824', 'step': 21374, 'epoch': 3} {'type': 'loss', 'content': 0.07933545857667923, 'timestamp': '2025-09-10 03:03:20.941085', 'step': 21375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:20.994674', 'step': 21375, 'epoch': 3} {'type': 'loss', 'content': 0.1030532568693161, 'timestamp': '2025-09-10 03:03:21.000788', 'step': 21376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:21.055220', 'step': 21376, 'epoch': 3} {'type': 'loss', 'content': 0.09099987149238586, 'timestamp': '2025-09-10 03:03:21.057375', 'step': 21377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:21.110864', 'step': 21377, 'epoch': 3} {'type': 'loss', 'content': 0.039030980318784714, 'timestamp': '2025-09-10 03:03:21.113063', 'step': 21378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:21.168749', 'step': 21378, 'epoch': 3} {'type': 'loss', 'content': 0.1606270968914032, 'timestamp': '2025-09-10 03:03:21.170969', 'step': 21379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:21.224479', 'step': 21379, 'epoch': 3} {'type': 'loss', 'content': 0.07219807058572769, 'timestamp': '2025-09-10 03:03:21.230544', 'step': 21380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:21.285053', 'step': 21380, 'epoch': 3} {'type': 'loss', 'content': 0.06990218907594681, 'timestamp': '2025-09-10 03:03:21.287338', 'step': 21381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:21.342096', 'step': 21381, 'epoch': 3} {'type': 'loss', 'content': 0.05982380732893944, 'timestamp': '2025-09-10 03:03:21.344438', 'step': 21382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:21.398841', 'step': 21382, 'epoch': 3} {'type': 'loss', 'content': 0.1016668900847435, 'timestamp': '2025-09-10 03:03:21.401158', 'step': 21383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:21.456507', 'step': 21383, 'epoch': 3} {'type': 'loss', 'content': 0.1167493462562561, 'timestamp': '2025-09-10 03:03:21.462455', 'step': 21384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:21.515521', 'step': 21384, 'epoch': 3} {'type': 'loss', 'content': 0.1410626918077469, 'timestamp': '2025-09-10 03:03:21.518954', 'step': 21385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:21.573727', 'step': 21385, 'epoch': 3} {'type': 'loss', 'content': 0.0470418818295002, 'timestamp': '2025-09-10 03:03:21.575962', 'step': 21386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:21.629759', 'step': 21386, 'epoch': 3} {'type': 'loss', 'content': 0.07722306251525879, 'timestamp': '2025-09-10 03:03:21.632022', 'step': 21387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:21.685281', 'step': 21387, 'epoch': 3} {'type': 'loss', 'content': 0.1046331599354744, 'timestamp': '2025-09-10 03:03:21.691133', 'step': 21388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:21.744081', 'step': 21388, 'epoch': 3} {'type': 'loss', 'content': 0.08445621281862259, 'timestamp': '2025-09-10 03:03:21.746241', 'step': 21389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:21.799936', 'step': 21389, 'epoch': 3} {'type': 'loss', 'content': 0.029792865738272667, 'timestamp': '2025-09-10 03:03:21.802101', 'step': 21390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:21.855876', 'step': 21390, 'epoch': 3} {'type': 'loss', 'content': 0.08542879670858383, 'timestamp': '2025-09-10 03:03:21.858192', 'step': 21391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:21.911370', 'step': 21391, 'epoch': 3} {'type': 'loss', 'content': 0.059379398822784424, 'timestamp': '2025-09-10 03:03:21.917534', 'step': 21392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:21.970685', 'step': 21392, 'epoch': 3} {'type': 'loss', 'content': 0.11402358114719391, 'timestamp': '2025-09-10 03:03:21.972747', 'step': 21393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:22.026401', 'step': 21393, 'epoch': 3} {'type': 'loss', 'content': 0.08023307472467422, 'timestamp': '2025-09-10 03:03:22.028654', 'step': 21394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:22.081997', 'step': 21394, 'epoch': 3} {'type': 'loss', 'content': 0.08624908328056335, 'timestamp': '2025-09-10 03:03:22.084268', 'step': 21395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:22.137532', 'step': 21395, 'epoch': 3} {'type': 'loss', 'content': 0.04562019556760788, 'timestamp': '2025-09-10 03:03:22.143500', 'step': 21396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:22.196407', 'step': 21396, 'epoch': 3} {'type': 'loss', 'content': 0.06037251278758049, 'timestamp': '2025-09-10 03:03:22.198772', 'step': 21397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:22.252130', 'step': 21397, 'epoch': 3} {'type': 'loss', 'content': 0.06955475360155106, 'timestamp': '2025-09-10 03:03:22.254574', 'step': 21398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:22.309074', 'step': 21398, 'epoch': 3} {'type': 'loss', 'content': 0.0928860530257225, 'timestamp': '2025-09-10 03:03:22.311424', 'step': 21399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:22.366305', 'step': 21399, 'epoch': 3} {'type': 'loss', 'content': 0.1083463579416275, 'timestamp': '2025-09-10 03:03:22.372493', 'step': 21400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:22.426588', 'step': 21400, 'epoch': 3} {'type': 'loss', 'content': 0.07632026076316833, 'timestamp': '2025-09-10 03:03:22.428840', 'step': 21401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:22.482134', 'step': 21401, 'epoch': 3} {'type': 'loss', 'content': 0.059287674725055695, 'timestamp': '2025-09-10 03:03:22.484195', 'step': 21402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:22.537724', 'step': 21402, 'epoch': 3} {'type': 'loss', 'content': 0.08865302056074142, 'timestamp': '2025-09-10 03:03:22.539926', 'step': 21403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:22.592912', 'step': 21403, 'epoch': 3} {'type': 'loss', 'content': 0.08094950765371323, 'timestamp': '2025-09-10 03:03:22.599226', 'step': 21404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:22.652465', 'step': 21404, 'epoch': 3} {'type': 'loss', 'content': 0.11042977124452591, 'timestamp': '2025-09-10 03:03:22.654941', 'step': 21405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:22.708838', 'step': 21405, 'epoch': 3} {'type': 'loss', 'content': 0.08448243141174316, 'timestamp': '2025-09-10 03:03:22.712610', 'step': 21406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:22.776519', 'step': 21406, 'epoch': 3} {'type': 'loss', 'content': 0.10815801471471786, 'timestamp': '2025-09-10 03:03:22.778573', 'step': 21407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:22.831734', 'step': 21407, 'epoch': 3} {'type': 'loss', 'content': 0.07921711355447769, 'timestamp': '2025-09-10 03:03:22.837687', 'step': 21408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:22.891108', 'step': 21408, 'epoch': 3} {'type': 'loss', 'content': 0.06417296826839447, 'timestamp': '2025-09-10 03:03:22.893217', 'step': 21409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:22.947219', 'step': 21409, 'epoch': 3} {'type': 'loss', 'content': 0.08059346675872803, 'timestamp': '2025-09-10 03:03:22.949584', 'step': 21410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:23.003146', 'step': 21410, 'epoch': 3} {'type': 'loss', 'content': 0.057795166969299316, 'timestamp': '2025-09-10 03:03:23.005541', 'step': 21411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:23.059253', 'step': 21411, 'epoch': 3} {'type': 'loss', 'content': 0.07908573746681213, 'timestamp': '2025-09-10 03:03:23.065360', 'step': 21412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:23.118528', 'step': 21412, 'epoch': 3} {'type': 'loss', 'content': 0.1537141650915146, 'timestamp': '2025-09-10 03:03:23.120793', 'step': 21413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:23.175872', 'step': 21413, 'epoch': 3} {'type': 'loss', 'content': 0.06317580491304398, 'timestamp': '2025-09-10 03:03:23.178047', 'step': 21414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:23.231540', 'step': 21414, 'epoch': 3} {'type': 'loss', 'content': 0.08336000889539719, 'timestamp': '2025-09-10 03:03:23.233594', 'step': 21415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:23.287282', 'step': 21415, 'epoch': 3} {'type': 'loss', 'content': 0.07542133331298828, 'timestamp': '2025-09-10 03:03:23.293017', 'step': 21416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:23.346328', 'step': 21416, 'epoch': 3} {'type': 'loss', 'content': 0.06118205189704895, 'timestamp': '2025-09-10 03:03:23.348464', 'step': 21417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:23.402130', 'step': 21417, 'epoch': 3} {'type': 'loss', 'content': 0.08217819780111313, 'timestamp': '2025-09-10 03:03:23.404410', 'step': 21418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:23.458460', 'step': 21418, 'epoch': 3} {'type': 'loss', 'content': 0.09826619178056717, 'timestamp': '2025-09-10 03:03:23.460706', 'step': 21419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:23.513980', 'step': 21419, 'epoch': 3} {'type': 'loss', 'content': 0.15805189311504364, 'timestamp': '2025-09-10 03:03:23.519917', 'step': 21420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:23.572890', 'step': 21420, 'epoch': 3} {'type': 'loss', 'content': 0.0963933914899826, 'timestamp': '2025-09-10 03:03:23.575241', 'step': 21421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:23.628645', 'step': 21421, 'epoch': 3} {'type': 'loss', 'content': 0.09927339106798172, 'timestamp': '2025-09-10 03:03:23.630746', 'step': 21422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:23.685408', 'step': 21422, 'epoch': 3} {'type': 'loss', 'content': 0.11316283047199249, 'timestamp': '2025-09-10 03:03:23.687642', 'step': 21423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:23.740889', 'step': 21423, 'epoch': 3} {'type': 'loss', 'content': 0.12179465591907501, 'timestamp': '2025-09-10 03:03:23.747013', 'step': 21424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:23.800179', 'step': 21424, 'epoch': 3} {'type': 'loss', 'content': 0.03574135899543762, 'timestamp': '2025-09-10 03:03:23.802617', 'step': 21425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:23.855960', 'step': 21425, 'epoch': 3} {'type': 'loss', 'content': 0.08770447969436646, 'timestamp': '2025-09-10 03:03:23.858335', 'step': 21426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:23.912400', 'step': 21426, 'epoch': 3} {'type': 'loss', 'content': 0.10399533808231354, 'timestamp': '2025-09-10 03:03:23.914681', 'step': 21427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:23.969350', 'step': 21427, 'epoch': 3} {'type': 'loss', 'content': 0.10694768279790878, 'timestamp': '2025-09-10 03:03:23.975557', 'step': 21428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:24.029020', 'step': 21428, 'epoch': 3} {'type': 'loss', 'content': 0.14782635867595673, 'timestamp': '2025-09-10 03:03:24.031349', 'step': 21429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:24.084528', 'step': 21429, 'epoch': 3} {'type': 'loss', 'content': 0.0603957325220108, 'timestamp': '2025-09-10 03:03:24.086765', 'step': 21430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:24.140428', 'step': 21430, 'epoch': 3} {'type': 'loss', 'content': 0.09284838289022446, 'timestamp': '2025-09-10 03:03:24.142689', 'step': 21431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:24.196475', 'step': 21431, 'epoch': 3} {'type': 'loss', 'content': 0.1272052824497223, 'timestamp': '2025-09-10 03:03:24.202488', 'step': 21432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:24.255642', 'step': 21432, 'epoch': 3} {'type': 'loss', 'content': 0.043733201920986176, 'timestamp': '2025-09-10 03:03:24.257877', 'step': 21433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:24.311092', 'step': 21433, 'epoch': 3} {'type': 'loss', 'content': 0.10584169626235962, 'timestamp': '2025-09-10 03:03:24.313335', 'step': 21434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:24.368469', 'step': 21434, 'epoch': 3} {'type': 'loss', 'content': 0.08318376541137695, 'timestamp': '2025-09-10 03:03:24.370757', 'step': 21435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:24.425513', 'step': 21435, 'epoch': 3} {'type': 'loss', 'content': 0.15502139925956726, 'timestamp': '2025-09-10 03:03:24.431588', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 03:03:37.064383', 'step': 21436, 'epoch': 3} {'type': 'pplx', 'content': 9934.308335947999, 'timestamp': '2025-09-10 03:03:37.067358', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:37.120665', 'step': 21436, 'epoch': 3} {'type': 'loss', 'content': 0.04234521463513374, 'timestamp': '2025-09-10 03:03:37.122860', 'step': 21437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:37.177012', 'step': 21437, 'epoch': 3} {'type': 'loss', 'content': 0.07627695798873901, 'timestamp': '2025-09-10 03:03:37.179191', 'step': 21438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:37.235543', 'step': 21438, 'epoch': 3} {'type': 'loss', 'content': 0.12198861688375473, 'timestamp': '2025-09-10 03:03:37.237503', 'step': 21439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:37.291848', 'step': 21439, 'epoch': 3} {'type': 'loss', 'content': 0.039663370698690414, 'timestamp': '2025-09-10 03:03:37.297930', 'step': 21440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:37.351939', 'step': 21440, 'epoch': 3} {'type': 'loss', 'content': 0.056445833295583725, 'timestamp': '2025-09-10 03:03:37.353798', 'step': 21441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:37.407616', 'step': 21441, 'epoch': 3} {'type': 'loss', 'content': 0.12433544546365738, 'timestamp': '2025-09-10 03:03:37.409531', 'step': 21442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:37.463979', 'step': 21442, 'epoch': 3} {'type': 'loss', 'content': 0.07954072952270508, 'timestamp': '2025-09-10 03:03:37.466173', 'step': 21443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:37.520084', 'step': 21443, 'epoch': 3} {'type': 'loss', 'content': 0.10757957398891449, 'timestamp': '2025-09-10 03:03:37.526102', 'step': 21444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:37.578949', 'step': 21444, 'epoch': 3} {'type': 'loss', 'content': 0.14425812661647797, 'timestamp': '2025-09-10 03:03:37.581108', 'step': 21445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:37.634397', 'step': 21445, 'epoch': 3} {'type': 'loss', 'content': 0.11243943125009537, 'timestamp': '2025-09-10 03:03:37.636546', 'step': 21446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:37.689698', 'step': 21446, 'epoch': 3} {'type': 'loss', 'content': 0.15016111731529236, 'timestamp': '2025-09-10 03:03:37.692305', 'step': 21447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:37.747080', 'step': 21447, 'epoch': 3} {'type': 'loss', 'content': 0.07079000025987625, 'timestamp': '2025-09-10 03:03:37.753386', 'step': 21448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:37.807734', 'step': 21448, 'epoch': 3} {'type': 'loss', 'content': 0.08576792478561401, 'timestamp': '2025-09-10 03:03:37.809757', 'step': 21449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:37.864421', 'step': 21449, 'epoch': 3} {'type': 'loss', 'content': 0.14112824201583862, 'timestamp': '2025-09-10 03:03:37.866319', 'step': 21450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:37.920151', 'step': 21450, 'epoch': 3} {'type': 'loss', 'content': 0.09672749787569046, 'timestamp': '2025-09-10 03:03:37.922047', 'step': 21451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:37.979289', 'step': 21451, 'epoch': 3} {'type': 'loss', 'content': 0.08912216871976852, 'timestamp': '2025-09-10 03:03:37.985260', 'step': 21452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:38.038106', 'step': 21452, 'epoch': 3} {'type': 'loss', 'content': 0.06441453844308853, 'timestamp': '2025-09-10 03:03:38.040283', 'step': 21453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:38.093919', 'step': 21453, 'epoch': 3} {'type': 'loss', 'content': 0.07021558284759521, 'timestamp': '2025-09-10 03:03:38.096125', 'step': 21454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:38.150058', 'step': 21454, 'epoch': 3} {'type': 'loss', 'content': 0.011525304988026619, 'timestamp': '2025-09-10 03:03:38.152292', 'step': 21455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:38.207335', 'step': 21455, 'epoch': 3} {'type': 'loss', 'content': 0.027354631572961807, 'timestamp': '2025-09-10 03:03:38.213263', 'step': 21456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:38.265763', 'step': 21456, 'epoch': 3} {'type': 'loss', 'content': 0.013620719313621521, 'timestamp': '2025-09-10 03:03:38.267951', 'step': 21457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:38.321470', 'step': 21457, 'epoch': 3} {'type': 'loss', 'content': 0.09772331267595291, 'timestamp': '2025-09-10 03:03:38.323426', 'step': 21458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:38.378287', 'step': 21458, 'epoch': 3} {'type': 'loss', 'content': 0.11153576523065567, 'timestamp': '2025-09-10 03:03:38.380126', 'step': 21459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:38.433430', 'step': 21459, 'epoch': 3} {'type': 'loss', 'content': 0.05886591225862503, 'timestamp': '2025-09-10 03:03:38.439269', 'step': 21460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:38.491993', 'step': 21460, 'epoch': 3} {'type': 'loss', 'content': 0.0915861502289772, 'timestamp': '2025-09-10 03:03:38.494088', 'step': 21461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:38.546993', 'step': 21461, 'epoch': 3} {'type': 'loss', 'content': 0.09067023545503616, 'timestamp': '2025-09-10 03:03:38.549007', 'step': 21462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:38.602386', 'step': 21462, 'epoch': 3} {'type': 'loss', 'content': 0.08599197119474411, 'timestamp': '2025-09-10 03:03:38.604643', 'step': 21463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:38.657637', 'step': 21463, 'epoch': 3} {'type': 'loss', 'content': 0.0755780041217804, 'timestamp': '2025-09-10 03:03:38.663738', 'step': 21464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:38.716022', 'step': 21464, 'epoch': 3} {'type': 'loss', 'content': 0.044650934636592865, 'timestamp': '2025-09-10 03:03:38.718228', 'step': 21465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:38.772360', 'step': 21465, 'epoch': 3} {'type': 'loss', 'content': 0.032698389142751694, 'timestamp': '2025-09-10 03:03:38.774117', 'step': 21466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:38.828215', 'step': 21466, 'epoch': 3} {'type': 'loss', 'content': 0.026656167581677437, 'timestamp': '2025-09-10 03:03:38.830542', 'step': 21467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:38.884397', 'step': 21467, 'epoch': 3} {'type': 'loss', 'content': 0.07834462821483612, 'timestamp': '2025-09-10 03:03:38.890205', 'step': 21468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:38.943656', 'step': 21468, 'epoch': 3} {'type': 'loss', 'content': 0.07969935983419418, 'timestamp': '2025-09-10 03:03:38.945906', 'step': 21469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:38.999865', 'step': 21469, 'epoch': 3} {'type': 'loss', 'content': 0.07436873763799667, 'timestamp': '2025-09-10 03:03:39.002129', 'step': 21470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:39.056342', 'step': 21470, 'epoch': 3} {'type': 'loss', 'content': 0.15234041213989258, 'timestamp': '2025-09-10 03:03:39.058473', 'step': 21471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:39.111826', 'step': 21471, 'epoch': 3} {'type': 'loss', 'content': 0.05517987161874771, 'timestamp': '2025-09-10 03:03:39.117859', 'step': 21472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:39.170749', 'step': 21472, 'epoch': 3} {'type': 'loss', 'content': 0.06669126451015472, 'timestamp': '2025-09-10 03:03:39.172661', 'step': 21473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:39.225740', 'step': 21473, 'epoch': 3} {'type': 'loss', 'content': 0.018887752667069435, 'timestamp': '2025-09-10 03:03:39.227469', 'step': 21474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:39.283756', 'step': 21474, 'epoch': 3} {'type': 'loss', 'content': 0.10833020508289337, 'timestamp': '2025-09-10 03:03:39.285587', 'step': 21475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:39.339971', 'step': 21475, 'epoch': 3} {'type': 'loss', 'content': 0.18304748833179474, 'timestamp': '2025-09-10 03:03:39.345689', 'step': 21476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:39.399244', 'step': 21476, 'epoch': 3} {'type': 'loss', 'content': 0.08526547253131866, 'timestamp': '2025-09-10 03:03:39.401760', 'step': 21477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:03:39.456443', 'step': 21477, 'epoch': 3} {'type': 'loss', 'content': 0.094355009496212, 'timestamp': '2025-09-10 03:03:39.458968', 'step': 21478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:39.512948', 'step': 21478, 'epoch': 3} {'type': 'loss', 'content': 0.09705045074224472, 'timestamp': '2025-09-10 03:03:39.515029', 'step': 21479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:39.568083', 'step': 21479, 'epoch': 3} {'type': 'loss', 'content': 0.06337335705757141, 'timestamp': '2025-09-10 03:03:39.574243', 'step': 21480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:39.627240', 'step': 21480, 'epoch': 3} {'type': 'loss', 'content': 0.07954654097557068, 'timestamp': '2025-09-10 03:03:39.629387', 'step': 21481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:39.685122', 'step': 21481, 'epoch': 3} {'type': 'loss', 'content': 0.11725763976573944, 'timestamp': '2025-09-10 03:03:39.687321', 'step': 21482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:39.741542', 'step': 21482, 'epoch': 3} {'type': 'loss', 'content': 0.13430255651474, 'timestamp': '2025-09-10 03:03:39.743512', 'step': 21483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:39.796392', 'step': 21483, 'epoch': 3} {'type': 'loss', 'content': 0.13080288469791412, 'timestamp': '2025-09-10 03:03:39.802381', 'step': 21484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:39.854941', 'step': 21484, 'epoch': 3} {'type': 'loss', 'content': 0.10195022076368332, 'timestamp': '2025-09-10 03:03:39.857100', 'step': 21485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:39.911720', 'step': 21485, 'epoch': 3} {'type': 'loss', 'content': 0.09593091905117035, 'timestamp': '2025-09-10 03:03:39.913893', 'step': 21486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:39.967237', 'step': 21486, 'epoch': 3} {'type': 'loss', 'content': 0.08570637553930283, 'timestamp': '2025-09-10 03:03:39.969557', 'step': 21487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:40.024530', 'step': 21487, 'epoch': 3} {'type': 'loss', 'content': 0.07303764671087265, 'timestamp': '2025-09-10 03:03:40.030749', 'step': 21488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:40.083482', 'step': 21488, 'epoch': 3} {'type': 'loss', 'content': 0.10478406399488449, 'timestamp': '2025-09-10 03:03:40.085743', 'step': 21489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:40.139297', 'step': 21489, 'epoch': 3} {'type': 'loss', 'content': 0.08066576719284058, 'timestamp': '2025-09-10 03:03:40.141488', 'step': 21490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:40.196192', 'step': 21490, 'epoch': 3} {'type': 'loss', 'content': 0.1466313898563385, 'timestamp': '2025-09-10 03:03:40.198530', 'step': 21491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:40.251532', 'step': 21491, 'epoch': 3} {'type': 'loss', 'content': 0.08447844535112381, 'timestamp': '2025-09-10 03:03:40.257570', 'step': 21492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:40.312333', 'step': 21492, 'epoch': 3} {'type': 'loss', 'content': 0.060160428285598755, 'timestamp': '2025-09-10 03:03:40.314554', 'step': 21493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:40.367564', 'step': 21493, 'epoch': 3} {'type': 'loss', 'content': 0.07801778614521027, 'timestamp': '2025-09-10 03:03:40.369802', 'step': 21494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:40.422804', 'step': 21494, 'epoch': 3} {'type': 'loss', 'content': 0.11053409427404404, 'timestamp': '2025-09-10 03:03:40.424983', 'step': 21495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:40.478455', 'step': 21495, 'epoch': 3} {'type': 'loss', 'content': 0.06625165045261383, 'timestamp': '2025-09-10 03:03:40.484550', 'step': 21496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:40.538203', 'step': 21496, 'epoch': 3} {'type': 'loss', 'content': 0.06895789504051208, 'timestamp': '2025-09-10 03:03:40.540461', 'step': 21497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:40.594833', 'step': 21497, 'epoch': 3} {'type': 'loss', 'content': 0.07548008859157562, 'timestamp': '2025-09-10 03:03:40.596878', 'step': 21498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:40.650612', 'step': 21498, 'epoch': 3} {'type': 'loss', 'content': 0.06912215054035187, 'timestamp': '2025-09-10 03:03:40.652836', 'step': 21499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:40.706089', 'step': 21499, 'epoch': 3} {'type': 'loss', 'content': 0.1142047569155693, 'timestamp': '2025-09-10 03:03:40.713177', 'step': 21500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21500', 'timestamp': '2025-09-10 03:03:41.066079', 'step': 21500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:41.122619', 'step': 21500, 'epoch': 3} {'type': 'loss', 'content': 0.1280318945646286, 'timestamp': '2025-09-10 03:03:41.124885', 'step': 21501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:41.178571', 'step': 21501, 'epoch': 3} {'type': 'loss', 'content': 0.04454752802848816, 'timestamp': '2025-09-10 03:03:41.180625', 'step': 21502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:41.234018', 'step': 21502, 'epoch': 3} {'type': 'loss', 'content': 0.06537381559610367, 'timestamp': '2025-09-10 03:03:41.236124', 'step': 21503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:41.288799', 'step': 21503, 'epoch': 3} {'type': 'loss', 'content': 0.0874495804309845, 'timestamp': '2025-09-10 03:03:41.294950', 'step': 21504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:41.347674', 'step': 21504, 'epoch': 3} {'type': 'loss', 'content': 0.17183248698711395, 'timestamp': '2025-09-10 03:03:41.349948', 'step': 21505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:41.402530', 'step': 21505, 'epoch': 3} {'type': 'loss', 'content': 0.03384524956345558, 'timestamp': '2025-09-10 03:03:41.404819', 'step': 21506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:41.458287', 'step': 21506, 'epoch': 3} {'type': 'loss', 'content': 0.07443415373563766, 'timestamp': '2025-09-10 03:03:41.460499', 'step': 21507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:41.515134', 'step': 21507, 'epoch': 3} {'type': 'loss', 'content': 0.10917794704437256, 'timestamp': '2025-09-10 03:03:41.521130', 'step': 21508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:41.574072', 'step': 21508, 'epoch': 3} {'type': 'loss', 'content': 0.11564740538597107, 'timestamp': '2025-09-10 03:03:41.576729', 'step': 21509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:41.630556', 'step': 21509, 'epoch': 3} {'type': 'loss', 'content': 0.07683181017637253, 'timestamp': '2025-09-10 03:03:41.632795', 'step': 21510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:41.687076', 'step': 21510, 'epoch': 3} {'type': 'loss', 'content': 0.0860736146569252, 'timestamp': '2025-09-10 03:03:41.689274', 'step': 21511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:41.744348', 'step': 21511, 'epoch': 3} {'type': 'loss', 'content': 0.13513803482055664, 'timestamp': '2025-09-10 03:03:41.750829', 'step': 21512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:41.809207', 'step': 21512, 'epoch': 3} {'type': 'loss', 'content': 0.0752265602350235, 'timestamp': '2025-09-10 03:03:41.811604', 'step': 21513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:41.866836', 'step': 21513, 'epoch': 3} {'type': 'loss', 'content': 0.08481233566999435, 'timestamp': '2025-09-10 03:03:41.869184', 'step': 21514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:41.922565', 'step': 21514, 'epoch': 3} {'type': 'loss', 'content': 0.055412162095308304, 'timestamp': '2025-09-10 03:03:41.924723', 'step': 21515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:41.978400', 'step': 21515, 'epoch': 3} {'type': 'loss', 'content': 0.07687495648860931, 'timestamp': '2025-09-10 03:03:41.984717', 'step': 21516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:42.037281', 'step': 21516, 'epoch': 3} {'type': 'loss', 'content': 0.10044761747121811, 'timestamp': '2025-09-10 03:03:42.039551', 'step': 21517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:42.092741', 'step': 21517, 'epoch': 3} {'type': 'loss', 'content': 0.01983564719557762, 'timestamp': '2025-09-10 03:03:42.094926', 'step': 21518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:42.148246', 'step': 21518, 'epoch': 3} {'type': 'loss', 'content': 0.030026577413082123, 'timestamp': '2025-09-10 03:03:42.150482', 'step': 21519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:42.203894', 'step': 21519, 'epoch': 3} {'type': 'loss', 'content': 0.06449040025472641, 'timestamp': '2025-09-10 03:03:42.210084', 'step': 21520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:42.263338', 'step': 21520, 'epoch': 3} {'type': 'loss', 'content': 0.056680306792259216, 'timestamp': '2025-09-10 03:03:42.265751', 'step': 21521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:42.320524', 'step': 21521, 'epoch': 3} {'type': 'loss', 'content': 0.07562685012817383, 'timestamp': '2025-09-10 03:03:42.323974', 'step': 21522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:42.378364', 'step': 21522, 'epoch': 3} {'type': 'loss', 'content': 0.1164446696639061, 'timestamp': '2025-09-10 03:03:42.380569', 'step': 21523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:42.433765', 'step': 21523, 'epoch': 3} {'type': 'loss', 'content': 0.15986424684524536, 'timestamp': '2025-09-10 03:03:42.439888', 'step': 21524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:42.493027', 'step': 21524, 'epoch': 3} {'type': 'loss', 'content': 0.04698074609041214, 'timestamp': '2025-09-10 03:03:42.495316', 'step': 21525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:42.548773', 'step': 21525, 'epoch': 3} {'type': 'loss', 'content': 0.06207586079835892, 'timestamp': '2025-09-10 03:03:42.551123', 'step': 21526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:42.604996', 'step': 21526, 'epoch': 3} {'type': 'loss', 'content': 0.05919001251459122, 'timestamp': '2025-09-10 03:03:42.607272', 'step': 21527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:42.660862', 'step': 21527, 'epoch': 3} {'type': 'loss', 'content': 0.06507910788059235, 'timestamp': '2025-09-10 03:03:42.666834', 'step': 21528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:42.719858', 'step': 21528, 'epoch': 3} {'type': 'loss', 'content': 0.042673543095588684, 'timestamp': '2025-09-10 03:03:42.722067', 'step': 21529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:42.775881', 'step': 21529, 'epoch': 3} {'type': 'loss', 'content': 0.12700155377388, 'timestamp': '2025-09-10 03:03:42.777966', 'step': 21530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:42.832921', 'step': 21530, 'epoch': 3} {'type': 'loss', 'content': 0.06870876252651215, 'timestamp': '2025-09-10 03:03:42.834896', 'step': 21531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:42.888499', 'step': 21531, 'epoch': 3} {'type': 'loss', 'content': 0.06385889649391174, 'timestamp': '2025-09-10 03:03:42.894597', 'step': 21532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:42.947666', 'step': 21532, 'epoch': 3} {'type': 'loss', 'content': 0.08773303031921387, 'timestamp': '2025-09-10 03:03:42.949767', 'step': 21533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.003005', 'step': 21533, 'epoch': 3} {'type': 'loss', 'content': 0.06956563144922256, 'timestamp': '2025-09-10 03:03:43.005504', 'step': 21534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.058762', 'step': 21534, 'epoch': 3} {'type': 'loss', 'content': 0.0633571669459343, 'timestamp': '2025-09-10 03:03:43.061164', 'step': 21535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.114771', 'step': 21535, 'epoch': 3} {'type': 'loss', 'content': 0.1194615587592125, 'timestamp': '2025-09-10 03:03:43.120826', 'step': 21536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:43.173556', 'step': 21536, 'epoch': 3} {'type': 'loss', 'content': 0.05090983584523201, 'timestamp': '2025-09-10 03:03:43.175667', 'step': 21537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:43.230524', 'step': 21537, 'epoch': 3} {'type': 'loss', 'content': 0.019526401534676552, 'timestamp': '2025-09-10 03:03:43.232689', 'step': 21538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.286852', 'step': 21538, 'epoch': 3} {'type': 'loss', 'content': 0.07473175972700119, 'timestamp': '2025-09-10 03:03:43.289047', 'step': 21539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.342569', 'step': 21539, 'epoch': 3} {'type': 'loss', 'content': 0.03621312975883484, 'timestamp': '2025-09-10 03:03:43.348722', 'step': 21540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:43.401651', 'step': 21540, 'epoch': 3} {'type': 'loss', 'content': 0.07199631631374359, 'timestamp': '2025-09-10 03:03:43.403794', 'step': 21541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:43.456399', 'step': 21541, 'epoch': 3} {'type': 'loss', 'content': 0.10138771682977676, 'timestamp': '2025-09-10 03:03:43.458574', 'step': 21542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.512011', 'step': 21542, 'epoch': 3} {'type': 'loss', 'content': 0.09939475357532501, 'timestamp': '2025-09-10 03:03:43.514193', 'step': 21543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.566879', 'step': 21543, 'epoch': 3} {'type': 'loss', 'content': 0.05658012256026268, 'timestamp': '2025-09-10 03:03:43.572954', 'step': 21544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:43.625091', 'step': 21544, 'epoch': 3} {'type': 'loss', 'content': 0.13180340826511383, 'timestamp': '2025-09-10 03:03:43.627234', 'step': 21545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.680188', 'step': 21545, 'epoch': 3} {'type': 'loss', 'content': 0.0755283385515213, 'timestamp': '2025-09-10 03:03:43.682401', 'step': 21546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:43.735235', 'step': 21546, 'epoch': 3} {'type': 'loss', 'content': 0.021013706922531128, 'timestamp': '2025-09-10 03:03:43.737342', 'step': 21547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:43.790548', 'step': 21547, 'epoch': 3} {'type': 'loss', 'content': 0.08050161600112915, 'timestamp': '2025-09-10 03:03:43.796503', 'step': 21548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.850383', 'step': 21548, 'epoch': 3} {'type': 'loss', 'content': 0.1831098347902298, 'timestamp': '2025-09-10 03:03:43.852660', 'step': 21549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:43.906645', 'step': 21549, 'epoch': 3} {'type': 'loss', 'content': 0.0818910002708435, 'timestamp': '2025-09-10 03:03:43.908813', 'step': 21550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:43.961822', 'step': 21550, 'epoch': 3} {'type': 'loss', 'content': 0.08957019448280334, 'timestamp': '2025-09-10 03:03:43.964044', 'step': 21551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:44.017519', 'step': 21551, 'epoch': 3} {'type': 'loss', 'content': 0.04785297065973282, 'timestamp': '2025-09-10 03:03:44.023549', 'step': 21552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:44.076347', 'step': 21552, 'epoch': 3} {'type': 'loss', 'content': 0.053909122943878174, 'timestamp': '2025-09-10 03:03:44.078583', 'step': 21553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:44.131476', 'step': 21553, 'epoch': 3} {'type': 'loss', 'content': 0.05177983641624451, 'timestamp': '2025-09-10 03:03:44.133629', 'step': 21554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:44.186433', 'step': 21554, 'epoch': 3} {'type': 'loss', 'content': 0.1041819155216217, 'timestamp': '2025-09-10 03:03:44.188574', 'step': 21555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:44.253655', 'step': 21555, 'epoch': 3} {'type': 'loss', 'content': 0.0954039916396141, 'timestamp': '2025-09-10 03:03:44.259467', 'step': 21556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:44.312189', 'step': 21556, 'epoch': 3} {'type': 'loss', 'content': 0.009554845280945301, 'timestamp': '2025-09-10 03:03:44.314680', 'step': 21557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:44.369387', 'step': 21557, 'epoch': 3} {'type': 'loss', 'content': 0.07674869894981384, 'timestamp': '2025-09-10 03:03:44.371624', 'step': 21558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:44.425699', 'step': 21558, 'epoch': 3} {'type': 'loss', 'content': 0.08992483466863632, 'timestamp': '2025-09-10 03:03:44.429303', 'step': 21559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:44.483660', 'step': 21559, 'epoch': 3} {'type': 'loss', 'content': 0.14048755168914795, 'timestamp': '2025-09-10 03:03:44.489641', 'step': 21560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:44.543554', 'step': 21560, 'epoch': 3} {'type': 'loss', 'content': 0.10435669869184494, 'timestamp': '2025-09-10 03:03:44.545684', 'step': 21561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:44.598831', 'step': 21561, 'epoch': 3} {'type': 'loss', 'content': 0.032329678535461426, 'timestamp': '2025-09-10 03:03:44.601017', 'step': 21562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:03:44.662860', 'step': 21562, 'epoch': 3} {'type': 'loss', 'content': 0.1408429592847824, 'timestamp': '2025-09-10 03:03:44.665145', 'step': 21563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:44.723244', 'step': 21563, 'epoch': 3} {'type': 'loss', 'content': 0.09164092689752579, 'timestamp': '2025-09-10 03:03:44.729308', 'step': 21564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:44.782006', 'step': 21564, 'epoch': 3} {'type': 'loss', 'content': 0.04616902023553848, 'timestamp': '2025-09-10 03:03:44.784482', 'step': 21565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:44.837263', 'step': 21565, 'epoch': 3} {'type': 'loss', 'content': 0.0489049106836319, 'timestamp': '2025-09-10 03:03:44.839486', 'step': 21566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:44.893496', 'step': 21566, 'epoch': 3} {'type': 'loss', 'content': 0.027555638924241066, 'timestamp': '2025-09-10 03:03:44.895647', 'step': 21567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:44.949568', 'step': 21567, 'epoch': 3} {'type': 'loss', 'content': 0.046178773045539856, 'timestamp': '2025-09-10 03:03:44.955653', 'step': 21568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:45.010048', 'step': 21568, 'epoch': 3} {'type': 'loss', 'content': 0.13339868187904358, 'timestamp': '2025-09-10 03:03:45.012191', 'step': 21569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:45.065427', 'step': 21569, 'epoch': 3} {'type': 'loss', 'content': 0.049557629972696304, 'timestamp': '2025-09-10 03:03:45.067585', 'step': 21570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:45.121478', 'step': 21570, 'epoch': 3} {'type': 'loss', 'content': 0.042837101966142654, 'timestamp': '2025-09-10 03:03:45.123715', 'step': 21571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:45.176642', 'step': 21571, 'epoch': 3} {'type': 'loss', 'content': 0.09531830251216888, 'timestamp': '2025-09-10 03:03:45.182493', 'step': 21572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:45.234951', 'step': 21572, 'epoch': 3} {'type': 'loss', 'content': 0.11534120887517929, 'timestamp': '2025-09-10 03:03:45.237099', 'step': 21573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:45.289892', 'step': 21573, 'epoch': 3} {'type': 'loss', 'content': 0.07327202707529068, 'timestamp': '2025-09-10 03:03:45.292051', 'step': 21574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:45.345945', 'step': 21574, 'epoch': 3} {'type': 'loss', 'content': 0.11614158749580383, 'timestamp': '2025-09-10 03:03:45.348112', 'step': 21575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:45.401699', 'step': 21575, 'epoch': 3} {'type': 'loss', 'content': 0.06345299631357193, 'timestamp': '2025-09-10 03:03:45.407430', 'step': 21576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:45.459473', 'step': 21576, 'epoch': 3} {'type': 'loss', 'content': 0.04796513542532921, 'timestamp': '2025-09-10 03:03:45.461761', 'step': 21577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:45.516777', 'step': 21577, 'epoch': 3} {'type': 'loss', 'content': 0.17650830745697021, 'timestamp': '2025-09-10 03:03:45.519132', 'step': 21578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:45.572569', 'step': 21578, 'epoch': 3} {'type': 'loss', 'content': 0.045537762343883514, 'timestamp': '2025-09-10 03:03:45.574739', 'step': 21579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:45.628275', 'step': 21579, 'epoch': 3} {'type': 'loss', 'content': 0.08225959539413452, 'timestamp': '2025-09-10 03:03:45.634247', 'step': 21580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:45.687167', 'step': 21580, 'epoch': 3} {'type': 'loss', 'content': 0.02992241643369198, 'timestamp': '2025-09-10 03:03:45.689407', 'step': 21581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:45.742369', 'step': 21581, 'epoch': 3} {'type': 'loss', 'content': 0.042232315987348557, 'timestamp': '2025-09-10 03:03:45.744505', 'step': 21582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:45.798121', 'step': 21582, 'epoch': 3} {'type': 'loss', 'content': 0.053175196051597595, 'timestamp': '2025-09-10 03:03:45.800315', 'step': 21583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:45.853711', 'step': 21583, 'epoch': 3} {'type': 'loss', 'content': 0.06741795688867569, 'timestamp': '2025-09-10 03:03:45.859499', 'step': 21584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:45.912061', 'step': 21584, 'epoch': 3} {'type': 'loss', 'content': 0.02912634238600731, 'timestamp': '2025-09-10 03:03:45.914224', 'step': 21585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:45.968246', 'step': 21585, 'epoch': 3} {'type': 'loss', 'content': 0.028679823502898216, 'timestamp': '2025-09-10 03:03:45.970457', 'step': 21586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:46.023729', 'step': 21586, 'epoch': 3} {'type': 'loss', 'content': 0.21046678721904755, 'timestamp': '2025-09-10 03:03:46.025911', 'step': 21587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:46.079370', 'step': 21587, 'epoch': 3} {'type': 'loss', 'content': 0.04644036293029785, 'timestamp': '2025-09-10 03:03:46.085333', 'step': 21588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:46.137845', 'step': 21588, 'epoch': 3} {'type': 'loss', 'content': 0.08767873048782349, 'timestamp': '2025-09-10 03:03:46.140176', 'step': 21589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:46.192646', 'step': 21589, 'epoch': 3} {'type': 'loss', 'content': 0.09032493829727173, 'timestamp': '2025-09-10 03:03:46.195114', 'step': 21590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:46.248595', 'step': 21590, 'epoch': 3} {'type': 'loss', 'content': 0.005757214035838842, 'timestamp': '2025-09-10 03:03:46.251248', 'step': 21591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:46.304821', 'step': 21591, 'epoch': 3} {'type': 'loss', 'content': 0.028995456174016, 'timestamp': '2025-09-10 03:03:46.311212', 'step': 21592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:46.363318', 'step': 21592, 'epoch': 3} {'type': 'loss', 'content': 0.07379013299942017, 'timestamp': '2025-09-10 03:03:46.365443', 'step': 21593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:46.419685', 'step': 21593, 'epoch': 3} {'type': 'loss', 'content': 0.148953378200531, 'timestamp': '2025-09-10 03:03:46.421831', 'step': 21594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:46.493342', 'step': 21594, 'epoch': 3} {'type': 'loss', 'content': 0.16704772412776947, 'timestamp': '2025-09-10 03:03:46.495504', 'step': 21595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:46.548382', 'step': 21595, 'epoch': 3} {'type': 'loss', 'content': 0.06825009733438492, 'timestamp': '2025-09-10 03:03:46.554145', 'step': 21596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:46.609338', 'step': 21596, 'epoch': 3} {'type': 'loss', 'content': 0.0663786455988884, 'timestamp': '2025-09-10 03:03:46.611494', 'step': 21597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:46.665122', 'step': 21597, 'epoch': 3} {'type': 'loss', 'content': 0.055146459490060806, 'timestamp': '2025-09-10 03:03:46.667358', 'step': 21598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:46.720753', 'step': 21598, 'epoch': 3} {'type': 'loss', 'content': 0.15872372686862946, 'timestamp': '2025-09-10 03:03:46.722892', 'step': 21599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:46.776199', 'step': 21599, 'epoch': 3} {'type': 'loss', 'content': 0.07554484158754349, 'timestamp': '2025-09-10 03:03:46.782123', 'step': 21600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:46.835748', 'step': 21600, 'epoch': 3} {'type': 'loss', 'content': 0.04166435822844505, 'timestamp': '2025-09-10 03:03:46.838329', 'step': 21601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:46.892658', 'step': 21601, 'epoch': 3} {'type': 'loss', 'content': 0.055884987115859985, 'timestamp': '2025-09-10 03:03:46.894810', 'step': 21602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:46.949642', 'step': 21602, 'epoch': 3} {'type': 'loss', 'content': 0.08678284287452698, 'timestamp': '2025-09-10 03:03:46.951747', 'step': 21603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:47.004908', 'step': 21603, 'epoch': 3} {'type': 'loss', 'content': 0.09002412855625153, 'timestamp': '2025-09-10 03:03:47.010750', 'step': 21604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:47.064284', 'step': 21604, 'epoch': 3} {'type': 'loss', 'content': 0.14844326674938202, 'timestamp': '2025-09-10 03:03:47.067724', 'step': 21605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:47.122927', 'step': 21605, 'epoch': 3} {'type': 'loss', 'content': 0.04218701273202896, 'timestamp': '2025-09-10 03:03:47.125179', 'step': 21606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:47.179711', 'step': 21606, 'epoch': 3} {'type': 'loss', 'content': 0.0672340840101242, 'timestamp': '2025-09-10 03:03:47.182042', 'step': 21607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:47.234820', 'step': 21607, 'epoch': 3} {'type': 'loss', 'content': 0.09703322499990463, 'timestamp': '2025-09-10 03:03:47.240723', 'step': 21608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:47.293345', 'step': 21608, 'epoch': 3} {'type': 'loss', 'content': 0.12529738247394562, 'timestamp': '2025-09-10 03:03:47.295473', 'step': 21609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:47.348627', 'step': 21609, 'epoch': 3} {'type': 'loss', 'content': 0.14058928191661835, 'timestamp': '2025-09-10 03:03:47.350766', 'step': 21610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:47.406184', 'step': 21610, 'epoch': 3} {'type': 'loss', 'content': 0.061304062604904175, 'timestamp': '2025-09-10 03:03:47.408331', 'step': 21611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:47.463777', 'step': 21611, 'epoch': 3} {'type': 'loss', 'content': 0.051279064267873764, 'timestamp': '2025-09-10 03:03:47.469588', 'step': 21612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:47.522478', 'step': 21612, 'epoch': 3} {'type': 'loss', 'content': 0.0893329307436943, 'timestamp': '2025-09-10 03:03:47.524607', 'step': 21613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:47.580063', 'step': 21613, 'epoch': 3} {'type': 'loss', 'content': 0.08091926574707031, 'timestamp': '2025-09-10 03:03:47.582223', 'step': 21614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:47.636353', 'step': 21614, 'epoch': 3} {'type': 'loss', 'content': 0.05757544934749603, 'timestamp': '2025-09-10 03:03:47.638540', 'step': 21615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:47.693210', 'step': 21615, 'epoch': 3} {'type': 'loss', 'content': 0.07506459206342697, 'timestamp': '2025-09-10 03:03:47.699396', 'step': 21616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:47.752323', 'step': 21616, 'epoch': 3} {'type': 'loss', 'content': 0.10116461664438248, 'timestamp': '2025-09-10 03:03:47.754475', 'step': 21617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:47.807750', 'step': 21617, 'epoch': 3} {'type': 'loss', 'content': 0.07399729639291763, 'timestamp': '2025-09-10 03:03:47.810914', 'step': 21618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:47.866270', 'step': 21618, 'epoch': 3} {'type': 'loss', 'content': 0.09627249091863632, 'timestamp': '2025-09-10 03:03:47.868386', 'step': 21619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:47.923635', 'step': 21619, 'epoch': 3} {'type': 'loss', 'content': 0.051253948360681534, 'timestamp': '2025-09-10 03:03:47.929541', 'step': 21620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:47.987935', 'step': 21620, 'epoch': 3} {'type': 'loss', 'content': 0.1439456045627594, 'timestamp': '2025-09-10 03:03:47.990149', 'step': 21621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:48.043000', 'step': 21621, 'epoch': 3} {'type': 'loss', 'content': 0.09574585407972336, 'timestamp': '2025-09-10 03:03:48.045176', 'step': 21622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:48.100056', 'step': 21622, 'epoch': 3} {'type': 'loss', 'content': 0.06383901834487915, 'timestamp': '2025-09-10 03:03:48.102287', 'step': 21623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:48.161168', 'step': 21623, 'epoch': 3} {'type': 'loss', 'content': 0.088824562728405, 'timestamp': '2025-09-10 03:03:48.167059', 'step': 21624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:48.221332', 'step': 21624, 'epoch': 3} {'type': 'loss', 'content': 0.14838796854019165, 'timestamp': '2025-09-10 03:03:48.223652', 'step': 21625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:03:48.277678', 'step': 21625, 'epoch': 3} {'type': 'loss', 'content': 0.0728084146976471, 'timestamp': '2025-09-10 03:03:48.279848', 'step': 21626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:48.335325', 'step': 21626, 'epoch': 3} {'type': 'loss', 'content': 0.13953471183776855, 'timestamp': '2025-09-10 03:03:48.337470', 'step': 21627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:48.392668', 'step': 21627, 'epoch': 3} {'type': 'loss', 'content': 0.08431218564510345, 'timestamp': '2025-09-10 03:03:48.398552', 'step': 21628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:48.451060', 'step': 21628, 'epoch': 3} {'type': 'loss', 'content': 0.013133064843714237, 'timestamp': '2025-09-10 03:03:48.453178', 'step': 21629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:48.505544', 'step': 21629, 'epoch': 3} {'type': 'loss', 'content': 0.08622551709413528, 'timestamp': '2025-09-10 03:03:48.507701', 'step': 21630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:48.560397', 'step': 21630, 'epoch': 3} {'type': 'loss', 'content': 0.20859260857105255, 'timestamp': '2025-09-10 03:03:48.562551', 'step': 21631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:48.616310', 'step': 21631, 'epoch': 3} {'type': 'loss', 'content': 0.0912507101893425, 'timestamp': '2025-09-10 03:03:48.622057', 'step': 21632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:48.674738', 'step': 21632, 'epoch': 3} {'type': 'loss', 'content': 0.03764460235834122, 'timestamp': '2025-09-10 03:03:48.676964', 'step': 21633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:48.730429', 'step': 21633, 'epoch': 3} {'type': 'loss', 'content': 0.08117020130157471, 'timestamp': '2025-09-10 03:03:48.732849', 'step': 21634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:48.787311', 'step': 21634, 'epoch': 3} {'type': 'loss', 'content': 0.09808686375617981, 'timestamp': '2025-09-10 03:03:48.789526', 'step': 21635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:48.843310', 'step': 21635, 'epoch': 3} {'type': 'loss', 'content': 0.11171847581863403, 'timestamp': '2025-09-10 03:03:48.849206', 'step': 21636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 03:03:48.902529', 'step': 21636, 'epoch': 3} {'type': 'loss', 'content': 0.09276247769594193, 'timestamp': '2025-09-10 03:03:48.904692', 'step': 21637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:48.957689', 'step': 21637, 'epoch': 3} {'type': 'loss', 'content': 0.07529682666063309, 'timestamp': '2025-09-10 03:03:48.960013', 'step': 21638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:49.013247', 'step': 21638, 'epoch': 3} {'type': 'loss', 'content': 0.07607541233301163, 'timestamp': '2025-09-10 03:03:49.015275', 'step': 21639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:49.068853', 'step': 21639, 'epoch': 3} {'type': 'loss', 'content': 0.047739505767822266, 'timestamp': '2025-09-10 03:03:49.074587', 'step': 21640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:49.127353', 'step': 21640, 'epoch': 3} {'type': 'loss', 'content': 0.1395840346813202, 'timestamp': '2025-09-10 03:03:49.129384', 'step': 21641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:49.182181', 'step': 21641, 'epoch': 3} {'type': 'loss', 'content': 0.10209736973047256, 'timestamp': '2025-09-10 03:03:49.184259', 'step': 21642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:49.237584', 'step': 21642, 'epoch': 3} {'type': 'loss', 'content': 0.1072540208697319, 'timestamp': '2025-09-10 03:03:49.239723', 'step': 21643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:49.293508', 'step': 21643, 'epoch': 3} {'type': 'loss', 'content': 0.075019970536232, 'timestamp': '2025-09-10 03:03:49.299406', 'step': 21644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:49.352907', 'step': 21644, 'epoch': 3} {'type': 'loss', 'content': 0.047294553369283676, 'timestamp': '2025-09-10 03:03:49.355032', 'step': 21645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:49.407789', 'step': 21645, 'epoch': 3} {'type': 'loss', 'content': 0.03175816312432289, 'timestamp': '2025-09-10 03:03:49.409888', 'step': 21646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:49.462418', 'step': 21646, 'epoch': 3} {'type': 'loss', 'content': 0.06402980536222458, 'timestamp': '2025-09-10 03:03:49.464528', 'step': 21647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:49.518206', 'step': 21647, 'epoch': 3} {'type': 'loss', 'content': 0.019857047125697136, 'timestamp': '2025-09-10 03:03:49.523979', 'step': 21648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:49.576079', 'step': 21648, 'epoch': 3} {'type': 'loss', 'content': 0.10324391722679138, 'timestamp': '2025-09-10 03:03:49.578337', 'step': 21649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:49.633718', 'step': 21649, 'epoch': 3} {'type': 'loss', 'content': 0.1831185519695282, 'timestamp': '2025-09-10 03:03:49.636045', 'step': 21650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:49.688919', 'step': 21650, 'epoch': 3} {'type': 'loss', 'content': 0.1387779414653778, 'timestamp': '2025-09-10 03:03:49.691105', 'step': 21651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:49.744137', 'step': 21651, 'epoch': 3} {'type': 'loss', 'content': 0.10978379845619202, 'timestamp': '2025-09-10 03:03:49.750018', 'step': 21652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:49.804621', 'step': 21652, 'epoch': 3} {'type': 'loss', 'content': 0.050674282014369965, 'timestamp': '2025-09-10 03:03:49.806792', 'step': 21653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:49.860526', 'step': 21653, 'epoch': 3} {'type': 'loss', 'content': 0.04423731192946434, 'timestamp': '2025-09-10 03:03:49.862811', 'step': 21654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:49.916122', 'step': 21654, 'epoch': 3} {'type': 'loss', 'content': 0.028652001172304153, 'timestamp': '2025-09-10 03:03:49.918327', 'step': 21655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:49.971412', 'step': 21655, 'epoch': 3} {'type': 'loss', 'content': 0.09206562489271164, 'timestamp': '2025-09-10 03:03:49.977356', 'step': 21656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:50.030854', 'step': 21656, 'epoch': 3} {'type': 'loss', 'content': 0.0873052179813385, 'timestamp': '2025-09-10 03:03:50.033004', 'step': 21657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:50.088265', 'step': 21657, 'epoch': 3} {'type': 'loss', 'content': 0.1224294900894165, 'timestamp': '2025-09-10 03:03:50.090937', 'step': 21658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:50.143637', 'step': 21658, 'epoch': 3} {'type': 'loss', 'content': 0.09368010610342026, 'timestamp': '2025-09-10 03:03:50.145901', 'step': 21659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:50.199431', 'step': 21659, 'epoch': 3} {'type': 'loss', 'content': 0.10260865837335587, 'timestamp': '2025-09-10 03:03:50.205263', 'step': 21660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:50.258332', 'step': 21660, 'epoch': 3} {'type': 'loss', 'content': 0.09676019847393036, 'timestamp': '2025-09-10 03:03:50.260297', 'step': 21661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:50.314121', 'step': 21661, 'epoch': 3} {'type': 'loss', 'content': 0.04776245728135109, 'timestamp': '2025-09-10 03:03:50.316335', 'step': 21662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:50.370712', 'step': 21662, 'epoch': 3} {'type': 'loss', 'content': 0.08968544751405716, 'timestamp': '2025-09-10 03:03:50.372945', 'step': 21663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:50.428086', 'step': 21663, 'epoch': 3} {'type': 'loss', 'content': 0.06257859617471695, 'timestamp': '2025-09-10 03:03:50.434174', 'step': 21664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:50.487053', 'step': 21664, 'epoch': 3} {'type': 'loss', 'content': 0.06948846578598022, 'timestamp': '2025-09-10 03:03:50.489109', 'step': 21665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:50.541989', 'step': 21665, 'epoch': 3} {'type': 'loss', 'content': 0.09881513565778732, 'timestamp': '2025-09-10 03:03:50.544124', 'step': 21666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:50.597047', 'step': 21666, 'epoch': 3} {'type': 'loss', 'content': 0.10029704123735428, 'timestamp': '2025-09-10 03:03:50.599310', 'step': 21667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:50.651828', 'step': 21667, 'epoch': 3} {'type': 'loss', 'content': 0.0816325694322586, 'timestamp': '2025-09-10 03:03:50.657708', 'step': 21668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:50.710902', 'step': 21668, 'epoch': 3} {'type': 'loss', 'content': 0.03710467740893364, 'timestamp': '2025-09-10 03:03:50.713088', 'step': 21669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:50.766126', 'step': 21669, 'epoch': 3} {'type': 'loss', 'content': 0.11399359256029129, 'timestamp': '2025-09-10 03:03:50.768297', 'step': 21670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:50.822120', 'step': 21670, 'epoch': 3} {'type': 'loss', 'content': 0.04099903255701065, 'timestamp': '2025-09-10 03:03:50.824406', 'step': 21671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:50.879014', 'step': 21671, 'epoch': 3} {'type': 'loss', 'content': 0.11553306877613068, 'timestamp': '2025-09-10 03:03:50.885214', 'step': 21672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:50.940120', 'step': 21672, 'epoch': 3} {'type': 'loss', 'content': 0.11059006303548813, 'timestamp': '2025-09-10 03:03:50.942401', 'step': 21673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:50.999271', 'step': 21673, 'epoch': 3} {'type': 'loss', 'content': 0.03616458922624588, 'timestamp': '2025-09-10 03:03:51.001491', 'step': 21674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:51.056247', 'step': 21674, 'epoch': 3} {'type': 'loss', 'content': 0.08672751486301422, 'timestamp': '2025-09-10 03:03:51.059834', 'step': 21675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:51.114751', 'step': 21675, 'epoch': 3} {'type': 'loss', 'content': 0.1076946035027504, 'timestamp': '2025-09-10 03:03:51.120751', 'step': 21676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:51.173536', 'step': 21676, 'epoch': 3} {'type': 'loss', 'content': 0.09371673315763474, 'timestamp': '2025-09-10 03:03:51.175908', 'step': 21677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:51.229559', 'step': 21677, 'epoch': 3} {'type': 'loss', 'content': 0.018556637689471245, 'timestamp': '2025-09-10 03:03:51.232041', 'step': 21678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:51.286607', 'step': 21678, 'epoch': 3} {'type': 'loss', 'content': 0.07550913095474243, 'timestamp': '2025-09-10 03:03:51.288797', 'step': 21679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:51.343489', 'step': 21679, 'epoch': 3} {'type': 'loss', 'content': 0.055224835872650146, 'timestamp': '2025-09-10 03:03:51.349466', 'step': 21680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:51.406378', 'step': 21680, 'epoch': 3} {'type': 'loss', 'content': 0.12895606458187103, 'timestamp': '2025-09-10 03:03:51.408596', 'step': 21681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:51.467551', 'step': 21681, 'epoch': 3} {'type': 'loss', 'content': 0.0717436671257019, 'timestamp': '2025-09-10 03:03:51.469866', 'step': 21682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:51.525691', 'step': 21682, 'epoch': 3} {'type': 'loss', 'content': 0.05138256028294563, 'timestamp': '2025-09-10 03:03:51.527982', 'step': 21683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:51.585437', 'step': 21683, 'epoch': 3} {'type': 'loss', 'content': 0.042426370084285736, 'timestamp': '2025-09-10 03:03:51.591395', 'step': 21684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:51.650374', 'step': 21684, 'epoch': 3} {'type': 'loss', 'content': 0.15265244245529175, 'timestamp': '2025-09-10 03:03:51.652541', 'step': 21685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:51.706946', 'step': 21685, 'epoch': 3} {'type': 'loss', 'content': 0.11873369663953781, 'timestamp': '2025-09-10 03:03:51.709120', 'step': 21686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:51.762627', 'step': 21686, 'epoch': 3} {'type': 'loss', 'content': 0.08269500732421875, 'timestamp': '2025-09-10 03:03:51.764783', 'step': 21687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:51.817973', 'step': 21687, 'epoch': 3} {'type': 'loss', 'content': 0.1269529014825821, 'timestamp': '2025-09-10 03:03:51.823938', 'step': 21688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:51.876494', 'step': 21688, 'epoch': 3} {'type': 'loss', 'content': 0.023806121200323105, 'timestamp': '2025-09-10 03:03:51.878621', 'step': 21689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:51.932052', 'step': 21689, 'epoch': 3} {'type': 'loss', 'content': 0.14216116070747375, 'timestamp': '2025-09-10 03:03:51.934254', 'step': 21690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:51.988443', 'step': 21690, 'epoch': 3} {'type': 'loss', 'content': 0.06557472050189972, 'timestamp': '2025-09-10 03:03:51.990492', 'step': 21691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:52.043666', 'step': 21691, 'epoch': 3} {'type': 'loss', 'content': 0.11628123372793198, 'timestamp': '2025-09-10 03:03:52.049542', 'step': 21692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:52.102989', 'step': 21692, 'epoch': 3} {'type': 'loss', 'content': 0.04380785673856735, 'timestamp': '2025-09-10 03:03:52.105373', 'step': 21693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:52.157654', 'step': 21693, 'epoch': 3} {'type': 'loss', 'content': 0.0842435359954834, 'timestamp': '2025-09-10 03:03:52.159818', 'step': 21694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:52.212624', 'step': 21694, 'epoch': 3} {'type': 'loss', 'content': 0.03670321777462959, 'timestamp': '2025-09-10 03:03:52.214873', 'step': 21695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:52.267414', 'step': 21695, 'epoch': 3} {'type': 'loss', 'content': 0.0493827648460865, 'timestamp': '2025-09-10 03:03:52.273277', 'step': 21696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:52.325818', 'step': 21696, 'epoch': 3} {'type': 'loss', 'content': 0.05454089492559433, 'timestamp': '2025-09-10 03:03:52.328035', 'step': 21697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:52.381182', 'step': 21697, 'epoch': 3} {'type': 'loss', 'content': 0.09711701422929764, 'timestamp': '2025-09-10 03:03:52.383338', 'step': 21698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:52.436809', 'step': 21698, 'epoch': 3} {'type': 'loss', 'content': 0.04814327135682106, 'timestamp': '2025-09-10 03:03:52.440444', 'step': 21699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:52.496846', 'step': 21699, 'epoch': 3} {'type': 'loss', 'content': 0.09424421191215515, 'timestamp': '2025-09-10 03:03:52.502820', 'step': 21700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:52.557312', 'step': 21700, 'epoch': 3} {'type': 'loss', 'content': 0.023826364427804947, 'timestamp': '2025-09-10 03:03:52.559511', 'step': 21701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:52.612922', 'step': 21701, 'epoch': 3} {'type': 'loss', 'content': 0.053094178438186646, 'timestamp': '2025-09-10 03:03:52.615095', 'step': 21702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:52.668118', 'step': 21702, 'epoch': 3} {'type': 'loss', 'content': 0.13976238667964935, 'timestamp': '2025-09-10 03:03:52.670289', 'step': 21703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:52.724856', 'step': 21703, 'epoch': 3} {'type': 'loss', 'content': 0.0916578397154808, 'timestamp': '2025-09-10 03:03:52.730687', 'step': 21704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:52.784133', 'step': 21704, 'epoch': 3} {'type': 'loss', 'content': 0.08964473009109497, 'timestamp': '2025-09-10 03:03:52.786334', 'step': 21705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:52.839192', 'step': 21705, 'epoch': 3} {'type': 'loss', 'content': 0.04368702694773674, 'timestamp': '2025-09-10 03:03:52.841589', 'step': 21706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:52.894751', 'step': 21706, 'epoch': 3} {'type': 'loss', 'content': 0.13417254388332367, 'timestamp': '2025-09-10 03:03:52.897007', 'step': 21707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:52.950430', 'step': 21707, 'epoch': 3} {'type': 'loss', 'content': 0.06635455787181854, 'timestamp': '2025-09-10 03:03:52.956406', 'step': 21708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:53.020106', 'step': 21708, 'epoch': 3} {'type': 'loss', 'content': 0.06968095898628235, 'timestamp': '2025-09-10 03:03:53.022695', 'step': 21709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:53.077618', 'step': 21709, 'epoch': 3} {'type': 'loss', 'content': 0.06453300267457962, 'timestamp': '2025-09-10 03:03:53.079805', 'step': 21710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:53.134361', 'step': 21710, 'epoch': 3} {'type': 'loss', 'content': 0.0957406759262085, 'timestamp': '2025-09-10 03:03:53.136631', 'step': 21711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:53.189428', 'step': 21711, 'epoch': 3} {'type': 'loss', 'content': 0.05135844275355339, 'timestamp': '2025-09-10 03:03:53.196703', 'step': 21712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:53.250361', 'step': 21712, 'epoch': 3} {'type': 'loss', 'content': 0.06826035678386688, 'timestamp': '2025-09-10 03:03:53.252422', 'step': 21713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:53.305820', 'step': 21713, 'epoch': 3} {'type': 'loss', 'content': 0.07078968733549118, 'timestamp': '2025-09-10 03:03:53.307938', 'step': 21714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:53.360932', 'step': 21714, 'epoch': 3} {'type': 'loss', 'content': 0.030208615586161613, 'timestamp': '2025-09-10 03:03:53.363185', 'step': 21715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:53.416467', 'step': 21715, 'epoch': 3} {'type': 'loss', 'content': 0.08753304183483124, 'timestamp': '2025-09-10 03:03:53.422303', 'step': 21716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:53.474856', 'step': 21716, 'epoch': 3} {'type': 'loss', 'content': 0.11725511401891708, 'timestamp': '2025-09-10 03:03:53.476965', 'step': 21717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:53.530940', 'step': 21717, 'epoch': 3} {'type': 'loss', 'content': 0.13936731219291687, 'timestamp': '2025-09-10 03:03:53.533154', 'step': 21718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:53.587515', 'step': 21718, 'epoch': 3} {'type': 'loss', 'content': 0.0725792869925499, 'timestamp': '2025-09-10 03:03:53.589718', 'step': 21719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:53.644473', 'step': 21719, 'epoch': 3} {'type': 'loss', 'content': 0.12061665207147598, 'timestamp': '2025-09-10 03:03:53.650570', 'step': 21720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:53.704764', 'step': 21720, 'epoch': 3} {'type': 'loss', 'content': 0.10746528953313828, 'timestamp': '2025-09-10 03:03:53.707088', 'step': 21721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:53.760536', 'step': 21721, 'epoch': 3} {'type': 'loss', 'content': 0.15028975903987885, 'timestamp': '2025-09-10 03:03:53.762612', 'step': 21722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:53.816259', 'step': 21722, 'epoch': 3} {'type': 'loss', 'content': 0.17010755836963654, 'timestamp': '2025-09-10 03:03:53.818429', 'step': 21723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:53.872606', 'step': 21723, 'epoch': 3} {'type': 'loss', 'content': 0.05200287327170372, 'timestamp': '2025-09-10 03:03:53.879461', 'step': 21724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:53.934142', 'step': 21724, 'epoch': 3} {'type': 'loss', 'content': 0.09676479548215866, 'timestamp': '2025-09-10 03:03:53.938041', 'step': 21725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:53.993652', 'step': 21725, 'epoch': 3} {'type': 'loss', 'content': 0.08014381676912308, 'timestamp': '2025-09-10 03:03:53.998034', 'step': 21726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:54.054243', 'step': 21726, 'epoch': 3} {'type': 'loss', 'content': 0.036772407591342926, 'timestamp': '2025-09-10 03:03:54.056635', 'step': 21727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:54.125374', 'step': 21727, 'epoch': 3} {'type': 'loss', 'content': 0.05709284916520119, 'timestamp': '2025-09-10 03:03:54.131279', 'step': 21728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:54.194976', 'step': 21728, 'epoch': 3} {'type': 'loss', 'content': 0.0770845115184784, 'timestamp': '2025-09-10 03:03:54.196953', 'step': 21729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:54.249792', 'step': 21729, 'epoch': 3} {'type': 'loss', 'content': 0.06409909576177597, 'timestamp': '2025-09-10 03:03:54.252002', 'step': 21730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:54.305213', 'step': 21730, 'epoch': 3} {'type': 'loss', 'content': 0.06658083945512772, 'timestamp': '2025-09-10 03:03:54.307578', 'step': 21731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:54.361083', 'step': 21731, 'epoch': 3} {'type': 'loss', 'content': 0.09687446802854538, 'timestamp': '2025-09-10 03:03:54.366941', 'step': 21732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:54.422765', 'step': 21732, 'epoch': 3} {'type': 'loss', 'content': 0.01859702356159687, 'timestamp': '2025-09-10 03:03:54.426083', 'step': 21733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:54.483769', 'step': 21733, 'epoch': 3} {'type': 'loss', 'content': 0.04082225635647774, 'timestamp': '2025-09-10 03:03:54.485882', 'step': 21734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:54.540027', 'step': 21734, 'epoch': 3} {'type': 'loss', 'content': 0.05748962238430977, 'timestamp': '2025-09-10 03:03:54.542427', 'step': 21735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:54.596111', 'step': 21735, 'epoch': 3} {'type': 'loss', 'content': 0.05534253269433975, 'timestamp': '2025-09-10 03:03:54.602302', 'step': 21736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:54.655932', 'step': 21736, 'epoch': 3} {'type': 'loss', 'content': 0.055825311690568924, 'timestamp': '2025-09-10 03:03:54.658080', 'step': 21737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:54.713901', 'step': 21737, 'epoch': 3} {'type': 'loss', 'content': 0.07575009763240814, 'timestamp': '2025-09-10 03:03:54.715888', 'step': 21738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:54.779109', 'step': 21738, 'epoch': 3} {'type': 'loss', 'content': 0.09302251785993576, 'timestamp': '2025-09-10 03:03:54.781351', 'step': 21739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:54.835899', 'step': 21739, 'epoch': 3} {'type': 'loss', 'content': 0.07040220499038696, 'timestamp': '2025-09-10 03:03:54.844843', 'step': 21740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:54.897676', 'step': 21740, 'epoch': 3} {'type': 'loss', 'content': 0.07743777334690094, 'timestamp': '2025-09-10 03:03:54.899822', 'step': 21741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:54.952735', 'step': 21741, 'epoch': 3} {'type': 'loss', 'content': 0.076290063560009, 'timestamp': '2025-09-10 03:03:54.954859', 'step': 21742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:55.009169', 'step': 21742, 'epoch': 3} {'type': 'loss', 'content': 0.05713910609483719, 'timestamp': '2025-09-10 03:03:55.011335', 'step': 21743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:55.064676', 'step': 21743, 'epoch': 3} {'type': 'loss', 'content': 0.12683351337909698, 'timestamp': '2025-09-10 03:03:55.070423', 'step': 21744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:55.123464', 'step': 21744, 'epoch': 3} {'type': 'loss', 'content': 0.07444228231906891, 'timestamp': '2025-09-10 03:03:55.125659', 'step': 21745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:55.179005', 'step': 21745, 'epoch': 3} {'type': 'loss', 'content': 0.1298500895500183, 'timestamp': '2025-09-10 03:03:55.181174', 'step': 21746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:55.234944', 'step': 21746, 'epoch': 3} {'type': 'loss', 'content': 0.12566110491752625, 'timestamp': '2025-09-10 03:03:55.237084', 'step': 21747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:55.291905', 'step': 21747, 'epoch': 3} {'type': 'loss', 'content': 0.048701051622629166, 'timestamp': '2025-09-10 03:03:55.297875', 'step': 21748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:55.350648', 'step': 21748, 'epoch': 3} {'type': 'loss', 'content': 0.09430860728025436, 'timestamp': '2025-09-10 03:03:55.353005', 'step': 21749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:55.407513', 'step': 21749, 'epoch': 3} {'type': 'loss', 'content': 0.06791004538536072, 'timestamp': '2025-09-10 03:03:55.409884', 'step': 21750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:55.462920', 'step': 21750, 'epoch': 3} {'type': 'loss', 'content': 0.0894322544336319, 'timestamp': '2025-09-10 03:03:55.465002', 'step': 21751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:55.518204', 'step': 21751, 'epoch': 3} {'type': 'loss', 'content': 0.053331173956394196, 'timestamp': '2025-09-10 03:03:55.524031', 'step': 21752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:55.576720', 'step': 21752, 'epoch': 3} {'type': 'loss', 'content': 0.10857432335615158, 'timestamp': '2025-09-10 03:03:55.578850', 'step': 21753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:55.632840', 'step': 21753, 'epoch': 3} {'type': 'loss', 'content': 0.09295037388801575, 'timestamp': '2025-09-10 03:03:55.635024', 'step': 21754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:55.689104', 'step': 21754, 'epoch': 3} {'type': 'loss', 'content': 0.07125724107027054, 'timestamp': '2025-09-10 03:03:55.691277', 'step': 21755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:55.745620', 'step': 21755, 'epoch': 3} {'type': 'loss', 'content': 0.05862727016210556, 'timestamp': '2025-09-10 03:03:55.751620', 'step': 21756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:55.805579', 'step': 21756, 'epoch': 3} {'type': 'loss', 'content': 0.11236058175563812, 'timestamp': '2025-09-10 03:03:55.807714', 'step': 21757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:55.861415', 'step': 21757, 'epoch': 3} {'type': 'loss', 'content': 0.09274464100599289, 'timestamp': '2025-09-10 03:03:55.863364', 'step': 21758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:55.917943', 'step': 21758, 'epoch': 3} {'type': 'loss', 'content': 0.11188249289989471, 'timestamp': '2025-09-10 03:03:55.920095', 'step': 21759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:55.973059', 'step': 21759, 'epoch': 3} {'type': 'loss', 'content': 0.10219711065292358, 'timestamp': '2025-09-10 03:03:55.978905', 'step': 21760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:56.031853', 'step': 21760, 'epoch': 3} {'type': 'loss', 'content': 0.0649658590555191, 'timestamp': '2025-09-10 03:03:56.034094', 'step': 21761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:56.087123', 'step': 21761, 'epoch': 3} {'type': 'loss', 'content': 0.07625322788953781, 'timestamp': '2025-09-10 03:03:56.089304', 'step': 21762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:56.142718', 'step': 21762, 'epoch': 3} {'type': 'loss', 'content': 0.12398644536733627, 'timestamp': '2025-09-10 03:03:56.145146', 'step': 21763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:56.199653', 'step': 21763, 'epoch': 3} {'type': 'loss', 'content': 0.05335721746087074, 'timestamp': '2025-09-10 03:03:56.205802', 'step': 21764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:56.258855', 'step': 21764, 'epoch': 3} {'type': 'loss', 'content': 0.09079785645008087, 'timestamp': '2025-09-10 03:03:56.261019', 'step': 21765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:56.314361', 'step': 21765, 'epoch': 3} {'type': 'loss', 'content': 0.06720858812332153, 'timestamp': '2025-09-10 03:03:56.316551', 'step': 21766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:56.370614', 'step': 21766, 'epoch': 3} {'type': 'loss', 'content': 0.058540575206279755, 'timestamp': '2025-09-10 03:03:56.372763', 'step': 21767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:56.426792', 'step': 21767, 'epoch': 3} {'type': 'loss', 'content': 0.08269400149583817, 'timestamp': '2025-09-10 03:03:56.432630', 'step': 21768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:56.485623', 'step': 21768, 'epoch': 3} {'type': 'loss', 'content': 0.09936323016881943, 'timestamp': '2025-09-10 03:03:56.487812', 'step': 21769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:56.541638', 'step': 21769, 'epoch': 3} {'type': 'loss', 'content': 0.0422978401184082, 'timestamp': '2025-09-10 03:03:56.543768', 'step': 21770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:56.597321', 'step': 21770, 'epoch': 3} {'type': 'loss', 'content': 0.047465212643146515, 'timestamp': '2025-09-10 03:03:56.599517', 'step': 21771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:56.655527', 'step': 21771, 'epoch': 3} {'type': 'loss', 'content': 0.02442590706050396, 'timestamp': '2025-09-10 03:03:56.661307', 'step': 21772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:56.714800', 'step': 21772, 'epoch': 3} {'type': 'loss', 'content': 0.08276534080505371, 'timestamp': '2025-09-10 03:03:56.717005', 'step': 21773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:56.771379', 'step': 21773, 'epoch': 3} {'type': 'loss', 'content': 0.11486109346151352, 'timestamp': '2025-09-10 03:03:56.773667', 'step': 21774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:56.827379', 'step': 21774, 'epoch': 3} {'type': 'loss', 'content': 0.09138258546590805, 'timestamp': '2025-09-10 03:03:56.829511', 'step': 21775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:56.883005', 'step': 21775, 'epoch': 3} {'type': 'loss', 'content': 0.09878816455602646, 'timestamp': '2025-09-10 03:03:56.888900', 'step': 21776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:56.941775', 'step': 21776, 'epoch': 3} {'type': 'loss', 'content': 0.02407127432525158, 'timestamp': '2025-09-10 03:03:56.943996', 'step': 21777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:56.997798', 'step': 21777, 'epoch': 3} {'type': 'loss', 'content': 0.04426170140504837, 'timestamp': '2025-09-10 03:03:57.000249', 'step': 21778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:57.054780', 'step': 21778, 'epoch': 3} {'type': 'loss', 'content': 0.07903510332107544, 'timestamp': '2025-09-10 03:03:57.057081', 'step': 21779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:57.111620', 'step': 21779, 'epoch': 3} {'type': 'loss', 'content': 0.07060354948043823, 'timestamp': '2025-09-10 03:03:57.117508', 'step': 21780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:57.170509', 'step': 21780, 'epoch': 3} {'type': 'loss', 'content': 0.0832735225558281, 'timestamp': '2025-09-10 03:03:57.172635', 'step': 21781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:57.226119', 'step': 21781, 'epoch': 3} {'type': 'loss', 'content': 0.07055262476205826, 'timestamp': '2025-09-10 03:03:57.228309', 'step': 21782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:57.282734', 'step': 21782, 'epoch': 3} {'type': 'loss', 'content': 0.18656599521636963, 'timestamp': '2025-09-10 03:03:57.284910', 'step': 21783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:57.338302', 'step': 21783, 'epoch': 3} {'type': 'loss', 'content': 0.14871475100517273, 'timestamp': '2025-09-10 03:03:57.344480', 'step': 21784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:57.400687', 'step': 21784, 'epoch': 3} {'type': 'loss', 'content': 0.05555630847811699, 'timestamp': '2025-09-10 03:03:57.403200', 'step': 21785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:57.455913', 'step': 21785, 'epoch': 3} {'type': 'loss', 'content': 0.07684516161680222, 'timestamp': '2025-09-10 03:03:57.457992', 'step': 21786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:57.515011', 'step': 21786, 'epoch': 3} {'type': 'loss', 'content': 0.12066581100225449, 'timestamp': '2025-09-10 03:03:57.516981', 'step': 21787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:57.573594', 'step': 21787, 'epoch': 3} {'type': 'loss', 'content': 0.06543401628732681, 'timestamp': '2025-09-10 03:03:57.579366', 'step': 21788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:57.634539', 'step': 21788, 'epoch': 3} {'type': 'loss', 'content': 0.06823123246431351, 'timestamp': '2025-09-10 03:03:57.636411', 'step': 21789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:57.690923', 'step': 21789, 'epoch': 3} {'type': 'loss', 'content': 0.047002799808979034, 'timestamp': '2025-09-10 03:03:57.693141', 'step': 21790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:57.747344', 'step': 21790, 'epoch': 3} {'type': 'loss', 'content': 0.11418987810611725, 'timestamp': '2025-09-10 03:03:57.749353', 'step': 21791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:57.803899', 'step': 21791, 'epoch': 3} {'type': 'loss', 'content': 0.12100248038768768, 'timestamp': '2025-09-10 03:03:57.810061', 'step': 21792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:57.863198', 'step': 21792, 'epoch': 3} {'type': 'loss', 'content': 0.10522708296775818, 'timestamp': '2025-09-10 03:03:57.865426', 'step': 21793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:57.919630', 'step': 21793, 'epoch': 3} {'type': 'loss', 'content': 0.10299018770456314, 'timestamp': '2025-09-10 03:03:57.921723', 'step': 21794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:57.975969', 'step': 21794, 'epoch': 3} {'type': 'loss', 'content': 0.17393533885478973, 'timestamp': '2025-09-10 03:03:57.978190', 'step': 21795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:58.036974', 'step': 21795, 'epoch': 3} {'type': 'loss', 'content': 0.08955499529838562, 'timestamp': '2025-09-10 03:03:58.042925', 'step': 21796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:58.095644', 'step': 21796, 'epoch': 3} {'type': 'loss', 'content': 0.1107650026679039, 'timestamp': '2025-09-10 03:03:58.097817', 'step': 21797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:58.151919', 'step': 21797, 'epoch': 3} {'type': 'loss', 'content': 0.06788533926010132, 'timestamp': '2025-09-10 03:03:58.154191', 'step': 21798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:58.208521', 'step': 21798, 'epoch': 3} {'type': 'loss', 'content': 0.08663920313119888, 'timestamp': '2025-09-10 03:03:58.210691', 'step': 21799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:58.264348', 'step': 21799, 'epoch': 3} {'type': 'loss', 'content': 0.05052454397082329, 'timestamp': '2025-09-10 03:03:58.270136', 'step': 21800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:58.324072', 'step': 21800, 'epoch': 3} {'type': 'loss', 'content': 0.045654553920030594, 'timestamp': '2025-09-10 03:03:58.326285', 'step': 21801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:58.380823', 'step': 21801, 'epoch': 3} {'type': 'loss', 'content': 0.1746765822172165, 'timestamp': '2025-09-10 03:03:58.383122', 'step': 21802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:58.437918', 'step': 21802, 'epoch': 3} {'type': 'loss', 'content': 0.06868570297956467, 'timestamp': '2025-09-10 03:03:58.440138', 'step': 21803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:58.493541', 'step': 21803, 'epoch': 3} {'type': 'loss', 'content': 0.14161911606788635, 'timestamp': '2025-09-10 03:03:58.499518', 'step': 21804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:58.553212', 'step': 21804, 'epoch': 3} {'type': 'loss', 'content': 0.06346609443426132, 'timestamp': '2025-09-10 03:03:58.555615', 'step': 21805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:58.609138', 'step': 21805, 'epoch': 3} {'type': 'loss', 'content': 0.05814650282263756, 'timestamp': '2025-09-10 03:03:58.611584', 'step': 21806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:58.665227', 'step': 21806, 'epoch': 3} {'type': 'loss', 'content': 0.07867157459259033, 'timestamp': '2025-09-10 03:03:58.667602', 'step': 21807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:03:58.724524', 'step': 21807, 'epoch': 3} {'type': 'loss', 'content': 0.05669135972857475, 'timestamp': '2025-09-10 03:03:58.730607', 'step': 21808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:58.783409', 'step': 21808, 'epoch': 3} {'type': 'loss', 'content': 0.05472283437848091, 'timestamp': '2025-09-10 03:03:58.785669', 'step': 21809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:58.839498', 'step': 21809, 'epoch': 3} {'type': 'loss', 'content': 0.07162042707204819, 'timestamp': '2025-09-10 03:03:58.841916', 'step': 21810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:58.896230', 'step': 21810, 'epoch': 3} {'type': 'loss', 'content': 0.07313287258148193, 'timestamp': '2025-09-10 03:03:58.898497', 'step': 21811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:58.952977', 'step': 21811, 'epoch': 3} {'type': 'loss', 'content': 0.08994198590517044, 'timestamp': '2025-09-10 03:03:58.958966', 'step': 21812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:59.011837', 'step': 21812, 'epoch': 3} {'type': 'loss', 'content': 0.15796427428722382, 'timestamp': '2025-09-10 03:03:59.014079', 'step': 21813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:59.067132', 'step': 21813, 'epoch': 3} {'type': 'loss', 'content': 0.05268725007772446, 'timestamp': '2025-09-10 03:03:59.069508', 'step': 21814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:59.122990', 'step': 21814, 'epoch': 3} {'type': 'loss', 'content': 0.07932568341493607, 'timestamp': '2025-09-10 03:03:59.125291', 'step': 21815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:59.178663', 'step': 21815, 'epoch': 3} {'type': 'loss', 'content': 0.08368086814880371, 'timestamp': '2025-09-10 03:03:59.184592', 'step': 21816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:59.239750', 'step': 21816, 'epoch': 3} {'type': 'loss', 'content': 0.07563348859548569, 'timestamp': '2025-09-10 03:03:59.241988', 'step': 21817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:59.296081', 'step': 21817, 'epoch': 3} {'type': 'loss', 'content': 0.08622035384178162, 'timestamp': '2025-09-10 03:03:59.298303', 'step': 21818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:59.353338', 'step': 21818, 'epoch': 3} {'type': 'loss', 'content': 0.10920429229736328, 'timestamp': '2025-09-10 03:03:59.355601', 'step': 21819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:03:59.410741', 'step': 21819, 'epoch': 3} {'type': 'loss', 'content': 0.09814484417438507, 'timestamp': '2025-09-10 03:03:59.416765', 'step': 21820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:03:59.470289', 'step': 21820, 'epoch': 3} {'type': 'loss', 'content': 0.041303616017103195, 'timestamp': '2025-09-10 03:03:59.472756', 'step': 21821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:59.525987', 'step': 21821, 'epoch': 3} {'type': 'loss', 'content': 0.17502674460411072, 'timestamp': '2025-09-10 03:03:59.528498', 'step': 21822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:59.582758', 'step': 21822, 'epoch': 3} {'type': 'loss', 'content': 0.13220198452472687, 'timestamp': '2025-09-10 03:03:59.585049', 'step': 21823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:59.637841', 'step': 21823, 'epoch': 3} {'type': 'loss', 'content': 0.03907815366983414, 'timestamp': '2025-09-10 03:03:59.643905', 'step': 21824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:59.696980', 'step': 21824, 'epoch': 3} {'type': 'loss', 'content': 0.11995706707239151, 'timestamp': '2025-09-10 03:03:59.699278', 'step': 21825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:59.753837', 'step': 21825, 'epoch': 3} {'type': 'loss', 'content': 0.1364925354719162, 'timestamp': '2025-09-10 03:03:59.756080', 'step': 21826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:03:59.810292', 'step': 21826, 'epoch': 3} {'type': 'loss', 'content': 0.08187177032232285, 'timestamp': '2025-09-10 03:03:59.812543', 'step': 21827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:03:59.866146', 'step': 21827, 'epoch': 3} {'type': 'loss', 'content': 0.05197356268763542, 'timestamp': '2025-09-10 03:03:59.872052', 'step': 21828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:03:59.926003', 'step': 21828, 'epoch': 3} {'type': 'loss', 'content': 0.06898859143257141, 'timestamp': '2025-09-10 03:03:59.928300', 'step': 21829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:03:59.982467', 'step': 21829, 'epoch': 3} {'type': 'loss', 'content': 0.15133793652057648, 'timestamp': '2025-09-10 03:03:59.984775', 'step': 21830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:00.039248', 'step': 21830, 'epoch': 3} {'type': 'loss', 'content': 0.06680774688720703, 'timestamp': '2025-09-10 03:04:00.041511', 'step': 21831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:00.094887', 'step': 21831, 'epoch': 3} {'type': 'loss', 'content': 0.06160213053226471, 'timestamp': '2025-09-10 03:04:00.101055', 'step': 21832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:00.154192', 'step': 21832, 'epoch': 3} {'type': 'loss', 'content': 0.04727500304579735, 'timestamp': '2025-09-10 03:04:00.156364', 'step': 21833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:00.210719', 'step': 21833, 'epoch': 3} {'type': 'loss', 'content': 0.02927342802286148, 'timestamp': '2025-09-10 03:04:00.213045', 'step': 21834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:00.269678', 'step': 21834, 'epoch': 3} {'type': 'loss', 'content': 0.1354503482580185, 'timestamp': '2025-09-10 03:04:00.272091', 'step': 21835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:00.326057', 'step': 21835, 'epoch': 3} {'type': 'loss', 'content': 0.0730758085846901, 'timestamp': '2025-09-10 03:04:00.332430', 'step': 21836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:00.385388', 'step': 21836, 'epoch': 3} {'type': 'loss', 'content': 0.04681937023997307, 'timestamp': '2025-09-10 03:04:00.387664', 'step': 21837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:00.441355', 'step': 21837, 'epoch': 3} {'type': 'loss', 'content': 0.01672891341149807, 'timestamp': '2025-09-10 03:04:00.443716', 'step': 21838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:00.498141', 'step': 21838, 'epoch': 3} {'type': 'loss', 'content': 0.1062629371881485, 'timestamp': '2025-09-10 03:04:00.500397', 'step': 21839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:00.554092', 'step': 21839, 'epoch': 3} {'type': 'loss', 'content': 0.046922747045755386, 'timestamp': '2025-09-10 03:04:00.560125', 'step': 21840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:00.613073', 'step': 21840, 'epoch': 3} {'type': 'loss', 'content': 0.0381353460252285, 'timestamp': '2025-09-10 03:04:00.615879', 'step': 21841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:00.669339', 'step': 21841, 'epoch': 3} {'type': 'loss', 'content': 0.10032356530427933, 'timestamp': '2025-09-10 03:04:00.671864', 'step': 21842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:00.725460', 'step': 21842, 'epoch': 3} {'type': 'loss', 'content': 0.07140832394361496, 'timestamp': '2025-09-10 03:04:00.727861', 'step': 21843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:00.781319', 'step': 21843, 'epoch': 3} {'type': 'loss', 'content': 0.08072441816329956, 'timestamp': '2025-09-10 03:04:00.787348', 'step': 21844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:00.839967', 'step': 21844, 'epoch': 3} {'type': 'loss', 'content': 0.06602058559656143, 'timestamp': '2025-09-10 03:04:00.842211', 'step': 21845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:04:00.898120', 'step': 21845, 'epoch': 3} {'type': 'loss', 'content': 0.0549587719142437, 'timestamp': '2025-09-10 03:04:00.900411', 'step': 21846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:00.953998', 'step': 21846, 'epoch': 3} {'type': 'loss', 'content': 0.0724102184176445, 'timestamp': '2025-09-10 03:04:00.956318', 'step': 21847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:01.010335', 'step': 21847, 'epoch': 3} {'type': 'loss', 'content': 0.12142496556043625, 'timestamp': '2025-09-10 03:04:01.016370', 'step': 21848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:01.069943', 'step': 21848, 'epoch': 3} {'type': 'loss', 'content': 0.1130795106291771, 'timestamp': '2025-09-10 03:04:01.072354', 'step': 21849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:01.125252', 'step': 21849, 'epoch': 3} {'type': 'loss', 'content': 0.13320888578891754, 'timestamp': '2025-09-10 03:04:01.127710', 'step': 21850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:01.180963', 'step': 21850, 'epoch': 3} {'type': 'loss', 'content': 0.05189574882388115, 'timestamp': '2025-09-10 03:04:01.183409', 'step': 21851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:01.236675', 'step': 21851, 'epoch': 3} {'type': 'loss', 'content': 0.022852394729852676, 'timestamp': '2025-09-10 03:04:01.242628', 'step': 21852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:01.296491', 'step': 21852, 'epoch': 3} {'type': 'loss', 'content': 0.0425088033080101, 'timestamp': '2025-09-10 03:04:01.298704', 'step': 21853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:01.352732', 'step': 21853, 'epoch': 3} {'type': 'loss', 'content': 0.08915015310049057, 'timestamp': '2025-09-10 03:04:01.354949', 'step': 21854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:01.408660', 'step': 21854, 'epoch': 3} {'type': 'loss', 'content': 0.04334840923547745, 'timestamp': '2025-09-10 03:04:01.410975', 'step': 21855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:01.469021', 'step': 21855, 'epoch': 3} {'type': 'loss', 'content': 0.06661489605903625, 'timestamp': '2025-09-10 03:04:01.474938', 'step': 21856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:01.531790', 'step': 21856, 'epoch': 3} {'type': 'loss', 'content': 0.04731503501534462, 'timestamp': '2025-09-10 03:04:01.534013', 'step': 21857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:01.586987', 'step': 21857, 'epoch': 3} {'type': 'loss', 'content': 0.06393690407276154, 'timestamp': '2025-09-10 03:04:01.589210', 'step': 21858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:04:01.643163', 'step': 21858, 'epoch': 3} {'type': 'loss', 'content': 0.043854933232069016, 'timestamp': '2025-09-10 03:04:01.645469', 'step': 21859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:01.699835', 'step': 21859, 'epoch': 3} {'type': 'loss', 'content': 0.060862403362989426, 'timestamp': '2025-09-10 03:04:01.705812', 'step': 21860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:01.758657', 'step': 21860, 'epoch': 3} {'type': 'loss', 'content': 0.11008157581090927, 'timestamp': '2025-09-10 03:04:01.760959', 'step': 21861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:01.813619', 'step': 21861, 'epoch': 3} {'type': 'loss', 'content': 0.06752391904592514, 'timestamp': '2025-09-10 03:04:01.815820', 'step': 21862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:01.869907', 'step': 21862, 'epoch': 3} {'type': 'loss', 'content': 0.09069874882698059, 'timestamp': '2025-09-10 03:04:01.872113', 'step': 21863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:01.927856', 'step': 21863, 'epoch': 3} {'type': 'loss', 'content': 0.06078840047121048, 'timestamp': '2025-09-10 03:04:01.933760', 'step': 21864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:01.987226', 'step': 21864, 'epoch': 3} {'type': 'loss', 'content': 0.04548400640487671, 'timestamp': '2025-09-10 03:04:01.989662', 'step': 21865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:02.042763', 'step': 21865, 'epoch': 3} {'type': 'loss', 'content': 0.07127542793750763, 'timestamp': '2025-09-10 03:04:02.045071', 'step': 21866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:02.099937', 'step': 21866, 'epoch': 3} {'type': 'loss', 'content': 0.06204067915678024, 'timestamp': '2025-09-10 03:04:02.102160', 'step': 21867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:02.156217', 'step': 21867, 'epoch': 3} {'type': 'loss', 'content': 0.0263885036110878, 'timestamp': '2025-09-10 03:04:02.162386', 'step': 21868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:02.215612', 'step': 21868, 'epoch': 3} {'type': 'loss', 'content': 0.09198452532291412, 'timestamp': '2025-09-10 03:04:02.217911', 'step': 21869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:02.271139', 'step': 21869, 'epoch': 3} {'type': 'loss', 'content': 0.14182820916175842, 'timestamp': '2025-09-10 03:04:02.273472', 'step': 21870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:02.327071', 'step': 21870, 'epoch': 3} {'type': 'loss', 'content': 0.05547717213630676, 'timestamp': '2025-09-10 03:04:02.329328', 'step': 21871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:02.383247', 'step': 21871, 'epoch': 3} {'type': 'loss', 'content': 0.05622139200568199, 'timestamp': '2025-09-10 03:04:02.389227', 'step': 21872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:02.465664', 'step': 21872, 'epoch': 3} {'type': 'loss', 'content': 0.1110270693898201, 'timestamp': '2025-09-10 03:04:02.467756', 'step': 21873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:02.521532', 'step': 21873, 'epoch': 3} {'type': 'loss', 'content': 0.03283000364899635, 'timestamp': '2025-09-10 03:04:02.523522', 'step': 21874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:02.579246', 'step': 21874, 'epoch': 3} {'type': 'loss', 'content': 0.16351856291294098, 'timestamp': '2025-09-10 03:04:02.581495', 'step': 21875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:02.635917', 'step': 21875, 'epoch': 3} {'type': 'loss', 'content': 0.048771388828754425, 'timestamp': '2025-09-10 03:04:02.642074', 'step': 21876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:02.695482', 'step': 21876, 'epoch': 3} {'type': 'loss', 'content': 0.1625782549381256, 'timestamp': '2025-09-10 03:04:02.697587', 'step': 21877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:02.751578', 'step': 21877, 'epoch': 3} {'type': 'loss', 'content': 0.06418740004301071, 'timestamp': '2025-09-10 03:04:02.754069', 'step': 21878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:02.808027', 'step': 21878, 'epoch': 3} {'type': 'loss', 'content': 0.1055113896727562, 'timestamp': '2025-09-10 03:04:02.810542', 'step': 21879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:02.866681', 'step': 21879, 'epoch': 3} {'type': 'loss', 'content': 0.09067720919847488, 'timestamp': '2025-09-10 03:04:02.872584', 'step': 21880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:02.925105', 'step': 21880, 'epoch': 3} {'type': 'loss', 'content': 0.10053758323192596, 'timestamp': '2025-09-10 03:04:02.927304', 'step': 21881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:02.980883', 'step': 21881, 'epoch': 3} {'type': 'loss', 'content': 0.09722181409597397, 'timestamp': '2025-09-10 03:04:02.983298', 'step': 21882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:03.048665', 'step': 21882, 'epoch': 3} {'type': 'loss', 'content': 0.0497804693877697, 'timestamp': '2025-09-10 03:04:03.050898', 'step': 21883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:03.105422', 'step': 21883, 'epoch': 3} {'type': 'loss', 'content': 0.028549611568450928, 'timestamp': '2025-09-10 03:04:03.111379', 'step': 21884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:03.165314', 'step': 21884, 'epoch': 3} {'type': 'loss', 'content': 0.09041127562522888, 'timestamp': '2025-09-10 03:04:03.167694', 'step': 21885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:03.221250', 'step': 21885, 'epoch': 3} {'type': 'loss', 'content': 0.03980047255754471, 'timestamp': '2025-09-10 03:04:03.223540', 'step': 21886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:03.278714', 'step': 21886, 'epoch': 3} {'type': 'loss', 'content': 0.036293406039476395, 'timestamp': '2025-09-10 03:04:03.281144', 'step': 21887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:03.335754', 'step': 21887, 'epoch': 3} {'type': 'loss', 'content': 0.12795913219451904, 'timestamp': '2025-09-10 03:04:03.341857', 'step': 21888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:03.395795', 'step': 21888, 'epoch': 3} {'type': 'loss', 'content': 0.02013503573834896, 'timestamp': '2025-09-10 03:04:03.398047', 'step': 21889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:03.451324', 'step': 21889, 'epoch': 3} {'type': 'loss', 'content': 0.07461993396282196, 'timestamp': '2025-09-10 03:04:03.453572', 'step': 21890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:03.507244', 'step': 21890, 'epoch': 3} {'type': 'loss', 'content': 0.0862160325050354, 'timestamp': '2025-09-10 03:04:03.509507', 'step': 21891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:03.563083', 'step': 21891, 'epoch': 3} {'type': 'loss', 'content': 0.06119740009307861, 'timestamp': '2025-09-10 03:04:03.569044', 'step': 21892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:03.622203', 'step': 21892, 'epoch': 3} {'type': 'loss', 'content': 0.14414283633232117, 'timestamp': '2025-09-10 03:04:03.624667', 'step': 21893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:03.679284', 'step': 21893, 'epoch': 3} {'type': 'loss', 'content': 0.08034677058458328, 'timestamp': '2025-09-10 03:04:03.681834', 'step': 21894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:03.736812', 'step': 21894, 'epoch': 3} {'type': 'loss', 'content': 0.09312456846237183, 'timestamp': '2025-09-10 03:04:03.739018', 'step': 21895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:03.792827', 'step': 21895, 'epoch': 3} {'type': 'loss', 'content': 0.07483051717281342, 'timestamp': '2025-09-10 03:04:03.799004', 'step': 21896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:03.851958', 'step': 21896, 'epoch': 3} {'type': 'loss', 'content': 0.0976157858967781, 'timestamp': '2025-09-10 03:04:03.854160', 'step': 21897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:03.907582', 'step': 21897, 'epoch': 3} {'type': 'loss', 'content': 0.1817779541015625, 'timestamp': '2025-09-10 03:04:03.909806', 'step': 21898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:03.962932', 'step': 21898, 'epoch': 3} {'type': 'loss', 'content': 0.06622859090566635, 'timestamp': '2025-09-10 03:04:03.965201', 'step': 21899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:04.019342', 'step': 21899, 'epoch': 3} {'type': 'loss', 'content': 0.10982123762369156, 'timestamp': '2025-09-10 03:04:04.025276', 'step': 21900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:04.078724', 'step': 21900, 'epoch': 3} {'type': 'loss', 'content': 0.08406637609004974, 'timestamp': '2025-09-10 03:04:04.081019', 'step': 21901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:04.133939', 'step': 21901, 'epoch': 3} {'type': 'loss', 'content': 0.032713793218135834, 'timestamp': '2025-09-10 03:04:04.136137', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 03:04:16.984606', 'step': 21902, 'epoch': 3} {'type': 'pplx', 'content': 9971.277635342054, 'timestamp': '2025-09-10 03:04:16.987871', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:17.042695', 'step': 21902, 'epoch': 3} {'type': 'loss', 'content': 0.13890722393989563, 'timestamp': '2025-09-10 03:04:17.044931', 'step': 21903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:17.103907', 'step': 21903, 'epoch': 3} {'type': 'loss', 'content': 0.03779376298189163, 'timestamp': '2025-09-10 03:04:17.110155', 'step': 21904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:17.169227', 'step': 21904, 'epoch': 3} {'type': 'loss', 'content': 0.07267310470342636, 'timestamp': '2025-09-10 03:04:17.171559', 'step': 21905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:17.226333', 'step': 21905, 'epoch': 3} {'type': 'loss', 'content': 0.09620853513479233, 'timestamp': '2025-09-10 03:04:17.228656', 'step': 21906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:17.285204', 'step': 21906, 'epoch': 3} {'type': 'loss', 'content': 0.11549925059080124, 'timestamp': '2025-09-10 03:04:17.287436', 'step': 21907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:17.348428', 'step': 21907, 'epoch': 3} {'type': 'loss', 'content': 0.059265412390232086, 'timestamp': '2025-09-10 03:04:17.354627', 'step': 21908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:17.409486', 'step': 21908, 'epoch': 3} {'type': 'loss', 'content': 0.05114651098847389, 'timestamp': '2025-09-10 03:04:17.411791', 'step': 21909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:17.472717', 'step': 21909, 'epoch': 3} {'type': 'loss', 'content': 0.09858698397874832, 'timestamp': '2025-09-10 03:04:17.475016', 'step': 21910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:17.528410', 'step': 21910, 'epoch': 3} {'type': 'loss', 'content': 0.12256590276956558, 'timestamp': '2025-09-10 03:04:17.530585', 'step': 21911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:17.584023', 'step': 21911, 'epoch': 3} {'type': 'loss', 'content': 0.08802660554647446, 'timestamp': '2025-09-10 03:04:17.589916', 'step': 21912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:17.643217', 'step': 21912, 'epoch': 3} {'type': 'loss', 'content': 0.1273624449968338, 'timestamp': '2025-09-10 03:04:17.645869', 'step': 21913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:17.699715', 'step': 21913, 'epoch': 3} {'type': 'loss', 'content': 0.06038571894168854, 'timestamp': '2025-09-10 03:04:17.701942', 'step': 21914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:17.756187', 'step': 21914, 'epoch': 3} {'type': 'loss', 'content': 0.18496567010879517, 'timestamp': '2025-09-10 03:04:17.758469', 'step': 21915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:17.813619', 'step': 21915, 'epoch': 3} {'type': 'loss', 'content': 0.07118752598762512, 'timestamp': '2025-09-10 03:04:17.821030', 'step': 21916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:17.878335', 'step': 21916, 'epoch': 3} {'type': 'loss', 'content': 0.05824488401412964, 'timestamp': '2025-09-10 03:04:17.880514', 'step': 21917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:17.934343', 'step': 21917, 'epoch': 3} {'type': 'loss', 'content': 0.1438860297203064, 'timestamp': '2025-09-10 03:04:17.936668', 'step': 21918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:17.993640', 'step': 21918, 'epoch': 3} {'type': 'loss', 'content': 0.08083467930555344, 'timestamp': '2025-09-10 03:04:17.995936', 'step': 21919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:18.049554', 'step': 21919, 'epoch': 3} {'type': 'loss', 'content': 0.09957339614629745, 'timestamp': '2025-09-10 03:04:18.055534', 'step': 21920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:18.108512', 'step': 21920, 'epoch': 3} {'type': 'loss', 'content': 0.01264598686248064, 'timestamp': '2025-09-10 03:04:18.110786', 'step': 21921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:18.167283', 'step': 21921, 'epoch': 3} {'type': 'loss', 'content': 0.08775381743907928, 'timestamp': '2025-09-10 03:04:18.169541', 'step': 21922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:18.227507', 'step': 21922, 'epoch': 3} {'type': 'loss', 'content': 0.12211675941944122, 'timestamp': '2025-09-10 03:04:18.229794', 'step': 21923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:18.299797', 'step': 21923, 'epoch': 3} {'type': 'loss', 'content': 0.08481607586145401, 'timestamp': '2025-09-10 03:04:18.305821', 'step': 21924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:18.360614', 'step': 21924, 'epoch': 3} {'type': 'loss', 'content': 0.08901924639940262, 'timestamp': '2025-09-10 03:04:18.362893', 'step': 21925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:18.417595', 'step': 21925, 'epoch': 3} {'type': 'loss', 'content': 0.057904209941625595, 'timestamp': '2025-09-10 03:04:18.419815', 'step': 21926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:18.473228', 'step': 21926, 'epoch': 3} {'type': 'loss', 'content': 0.05584549903869629, 'timestamp': '2025-09-10 03:04:18.475394', 'step': 21927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:18.528890', 'step': 21927, 'epoch': 3} {'type': 'loss', 'content': 0.051857031881809235, 'timestamp': '2025-09-10 03:04:18.534811', 'step': 21928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:18.591392', 'step': 21928, 'epoch': 3} {'type': 'loss', 'content': 0.15493233501911163, 'timestamp': '2025-09-10 03:04:18.593604', 'step': 21929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:18.646797', 'step': 21929, 'epoch': 3} {'type': 'loss', 'content': 0.04898862540721893, 'timestamp': '2025-09-10 03:04:18.649035', 'step': 21930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:18.703069', 'step': 21930, 'epoch': 3} {'type': 'loss', 'content': 0.06180929020047188, 'timestamp': '2025-09-10 03:04:18.705577', 'step': 21931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:18.758872', 'step': 21931, 'epoch': 3} {'type': 'loss', 'content': 0.10899414867162704, 'timestamp': '2025-09-10 03:04:18.764744', 'step': 21932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:18.817697', 'step': 21932, 'epoch': 3} {'type': 'loss', 'content': 0.11103793978691101, 'timestamp': '2025-09-10 03:04:18.819749', 'step': 21933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:18.873670', 'step': 21933, 'epoch': 3} {'type': 'loss', 'content': 0.10963182151317596, 'timestamp': '2025-09-10 03:04:18.876012', 'step': 21934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:18.934383', 'step': 21934, 'epoch': 3} {'type': 'loss', 'content': 0.07794533669948578, 'timestamp': '2025-09-10 03:04:18.938028', 'step': 21935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:18.991628', 'step': 21935, 'epoch': 3} {'type': 'loss', 'content': 0.1460094451904297, 'timestamp': '2025-09-10 03:04:18.997655', 'step': 21936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:19.050831', 'step': 21936, 'epoch': 3} {'type': 'loss', 'content': 0.138031005859375, 'timestamp': '2025-09-10 03:04:19.053095', 'step': 21937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:19.106843', 'step': 21937, 'epoch': 3} {'type': 'loss', 'content': 0.11478088796138763, 'timestamp': '2025-09-10 03:04:19.109188', 'step': 21938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:19.162273', 'step': 21938, 'epoch': 3} {'type': 'loss', 'content': 0.08852057158946991, 'timestamp': '2025-09-10 03:04:19.164317', 'step': 21939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:19.218288', 'step': 21939, 'epoch': 3} {'type': 'loss', 'content': 0.04868479445576668, 'timestamp': '2025-09-10 03:04:19.224211', 'step': 21940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:19.276959', 'step': 21940, 'epoch': 3} {'type': 'loss', 'content': 0.10947076231241226, 'timestamp': '2025-09-10 03:04:19.279173', 'step': 21941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:19.332749', 'step': 21941, 'epoch': 3} {'type': 'loss', 'content': 0.08082281798124313, 'timestamp': '2025-09-10 03:04:19.335081', 'step': 21942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:19.390746', 'step': 21942, 'epoch': 3} {'type': 'loss', 'content': 0.07006951421499252, 'timestamp': '2025-09-10 03:04:19.393146', 'step': 21943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:19.446877', 'step': 21943, 'epoch': 3} {'type': 'loss', 'content': 0.030293293297290802, 'timestamp': '2025-09-10 03:04:19.453042', 'step': 21944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:19.506219', 'step': 21944, 'epoch': 3} {'type': 'loss', 'content': 0.09073353558778763, 'timestamp': '2025-09-10 03:04:19.508554', 'step': 21945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:19.562182', 'step': 21945, 'epoch': 3} {'type': 'loss', 'content': 0.1047825887799263, 'timestamp': '2025-09-10 03:04:19.564592', 'step': 21946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:19.618944', 'step': 21946, 'epoch': 3} {'type': 'loss', 'content': 0.08053553849458694, 'timestamp': '2025-09-10 03:04:19.621338', 'step': 21947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:19.674543', 'step': 21947, 'epoch': 3} {'type': 'loss', 'content': 0.05506317317485809, 'timestamp': '2025-09-10 03:04:19.680414', 'step': 21948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:04:19.733552', 'step': 21948, 'epoch': 3} {'type': 'loss', 'content': 0.09391151368618011, 'timestamp': '2025-09-10 03:04:19.735751', 'step': 21949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:19.789846', 'step': 21949, 'epoch': 3} {'type': 'loss', 'content': 0.07382673025131226, 'timestamp': '2025-09-10 03:04:19.792109', 'step': 21950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:19.845695', 'step': 21950, 'epoch': 3} {'type': 'loss', 'content': 0.0497351810336113, 'timestamp': '2025-09-10 03:04:19.847929', 'step': 21951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:19.901915', 'step': 21951, 'epoch': 3} {'type': 'loss', 'content': 0.0309671089053154, 'timestamp': '2025-09-10 03:04:19.907987', 'step': 21952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:19.960673', 'step': 21952, 'epoch': 3} {'type': 'loss', 'content': 0.10820456594228745, 'timestamp': '2025-09-10 03:04:19.962814', 'step': 21953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.015735', 'step': 21953, 'epoch': 3} {'type': 'loss', 'content': 0.11184410750865936, 'timestamp': '2025-09-10 03:04:20.017930', 'step': 21954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:20.071054', 'step': 21954, 'epoch': 3} {'type': 'loss', 'content': 0.08108925074338913, 'timestamp': '2025-09-10 03:04:20.073210', 'step': 21955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:20.125852', 'step': 21955, 'epoch': 3} {'type': 'loss', 'content': 0.05255233868956566, 'timestamp': '2025-09-10 03:04:20.131810', 'step': 21956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:20.185325', 'step': 21956, 'epoch': 3} {'type': 'loss', 'content': 0.08562939614057541, 'timestamp': '2025-09-10 03:04:20.187641', 'step': 21957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.240789', 'step': 21957, 'epoch': 3} {'type': 'loss', 'content': 0.04975977912545204, 'timestamp': '2025-09-10 03:04:20.243050', 'step': 21958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:20.298009', 'step': 21958, 'epoch': 3} {'type': 'loss', 'content': 0.06903622299432755, 'timestamp': '2025-09-10 03:04:20.300436', 'step': 21959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.354792', 'step': 21959, 'epoch': 3} {'type': 'loss', 'content': 0.07795730978250504, 'timestamp': '2025-09-10 03:04:20.360826', 'step': 21960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.415527', 'step': 21960, 'epoch': 3} {'type': 'loss', 'content': 0.06757479161024094, 'timestamp': '2025-09-10 03:04:20.417895', 'step': 21961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.471804', 'step': 21961, 'epoch': 3} {'type': 'loss', 'content': 0.10851941257715225, 'timestamp': '2025-09-10 03:04:20.474022', 'step': 21962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.528054', 'step': 21962, 'epoch': 3} {'type': 'loss', 'content': 0.07600759714841843, 'timestamp': '2025-09-10 03:04:20.530305', 'step': 21963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:20.584140', 'step': 21963, 'epoch': 3} {'type': 'loss', 'content': 0.07647326588630676, 'timestamp': '2025-09-10 03:04:20.590247', 'step': 21964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:20.644365', 'step': 21964, 'epoch': 3} {'type': 'loss', 'content': 0.063027024269104, 'timestamp': '2025-09-10 03:04:20.646583', 'step': 21965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.701785', 'step': 21965, 'epoch': 3} {'type': 'loss', 'content': 0.043323319405317307, 'timestamp': '2025-09-10 03:04:20.704068', 'step': 21966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.757825', 'step': 21966, 'epoch': 3} {'type': 'loss', 'content': 0.10798845440149307, 'timestamp': '2025-09-10 03:04:20.760099', 'step': 21967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.813620', 'step': 21967, 'epoch': 3} {'type': 'loss', 'content': 0.07327407598495483, 'timestamp': '2025-09-10 03:04:20.819493', 'step': 21968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.872073', 'step': 21968, 'epoch': 3} {'type': 'loss', 'content': 0.07892503589391708, 'timestamp': '2025-09-10 03:04:20.874331', 'step': 21969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.927867', 'step': 21969, 'epoch': 3} {'type': 'loss', 'content': 0.035740841180086136, 'timestamp': '2025-09-10 03:04:20.930110', 'step': 21970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:20.985310', 'step': 21970, 'epoch': 3} {'type': 'loss', 'content': 0.11644157767295837, 'timestamp': '2025-09-10 03:04:20.987682', 'step': 21971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:21.041760', 'step': 21971, 'epoch': 3} {'type': 'loss', 'content': 0.13826709985733032, 'timestamp': '2025-09-10 03:04:21.047778', 'step': 21972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:21.107976', 'step': 21972, 'epoch': 3} {'type': 'loss', 'content': 0.1010063961148262, 'timestamp': '2025-09-10 03:04:21.110407', 'step': 21973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:21.163684', 'step': 21973, 'epoch': 3} {'type': 'loss', 'content': 0.06877795606851578, 'timestamp': '2025-09-10 03:04:21.166091', 'step': 21974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:21.221829', 'step': 21974, 'epoch': 3} {'type': 'loss', 'content': 0.1027805358171463, 'timestamp': '2025-09-10 03:04:21.224334', 'step': 21975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:21.277843', 'step': 21975, 'epoch': 3} {'type': 'loss', 'content': 0.03276348114013672, 'timestamp': '2025-09-10 03:04:21.283860', 'step': 21976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:21.336899', 'step': 21976, 'epoch': 3} {'type': 'loss', 'content': 0.14874787628650665, 'timestamp': '2025-09-10 03:04:21.339121', 'step': 21977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:21.392256', 'step': 21977, 'epoch': 3} {'type': 'loss', 'content': 0.14612804353237152, 'timestamp': '2025-09-10 03:04:21.394595', 'step': 21978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:21.448152', 'step': 21978, 'epoch': 3} {'type': 'loss', 'content': 0.18210457265377045, 'timestamp': '2025-09-10 03:04:21.450448', 'step': 21979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:21.504155', 'step': 21979, 'epoch': 3} {'type': 'loss', 'content': 0.19249361753463745, 'timestamp': '2025-09-10 03:04:21.510404', 'step': 21980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:21.564413', 'step': 21980, 'epoch': 3} {'type': 'loss', 'content': 0.06768511235713959, 'timestamp': '2025-09-10 03:04:21.566672', 'step': 21981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:21.619997', 'step': 21981, 'epoch': 3} {'type': 'loss', 'content': 0.1477992832660675, 'timestamp': '2025-09-10 03:04:21.622130', 'step': 21982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:21.675763', 'step': 21982, 'epoch': 3} {'type': 'loss', 'content': 0.14381156861782074, 'timestamp': '2025-09-10 03:04:21.677798', 'step': 21983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:21.732283', 'step': 21983, 'epoch': 3} {'type': 'loss', 'content': 0.09371870756149292, 'timestamp': '2025-09-10 03:04:21.738201', 'step': 21984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:21.791724', 'step': 21984, 'epoch': 3} {'type': 'loss', 'content': 0.0541054829955101, 'timestamp': '2025-09-10 03:04:21.793933', 'step': 21985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:21.847313', 'step': 21985, 'epoch': 3} {'type': 'loss', 'content': 0.16397224366664886, 'timestamp': '2025-09-10 03:04:21.849495', 'step': 21986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:04:21.903154', 'step': 21986, 'epoch': 3} {'type': 'loss', 'content': 0.13031938672065735, 'timestamp': '2025-09-10 03:04:21.905516', 'step': 21987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:21.959217', 'step': 21987, 'epoch': 3} {'type': 'loss', 'content': 0.018562132492661476, 'timestamp': '2025-09-10 03:04:21.965301', 'step': 21988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:22.018318', 'step': 21988, 'epoch': 3} {'type': 'loss', 'content': 0.07445541024208069, 'timestamp': '2025-09-10 03:04:22.020563', 'step': 21989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:22.076030', 'step': 21989, 'epoch': 3} {'type': 'loss', 'content': 0.07120788097381592, 'timestamp': '2025-09-10 03:04:22.078326', 'step': 21990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:22.133161', 'step': 21990, 'epoch': 3} {'type': 'loss', 'content': 0.06875193864107132, 'timestamp': '2025-09-10 03:04:22.135406', 'step': 21991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:22.189248', 'step': 21991, 'epoch': 3} {'type': 'loss', 'content': 0.07641510665416718, 'timestamp': '2025-09-10 03:04:22.195309', 'step': 21992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:22.248607', 'step': 21992, 'epoch': 3} {'type': 'loss', 'content': 0.037032995373010635, 'timestamp': '2025-09-10 03:04:22.250883', 'step': 21993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:22.304664', 'step': 21993, 'epoch': 3} {'type': 'loss', 'content': 0.07638191431760788, 'timestamp': '2025-09-10 03:04:22.306879', 'step': 21994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:22.360672', 'step': 21994, 'epoch': 3} {'type': 'loss', 'content': 0.06853396445512772, 'timestamp': '2025-09-10 03:04:22.362984', 'step': 21995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:22.416415', 'step': 21995, 'epoch': 3} {'type': 'loss', 'content': 0.07817916572093964, 'timestamp': '2025-09-10 03:04:22.422394', 'step': 21996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:22.475486', 'step': 21996, 'epoch': 3} {'type': 'loss', 'content': 0.11560635268688202, 'timestamp': '2025-09-10 03:04:22.477811', 'step': 21997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:22.531673', 'step': 21997, 'epoch': 3} {'type': 'loss', 'content': 0.13001200556755066, 'timestamp': '2025-09-10 03:04:22.533904', 'step': 21998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:22.588487', 'step': 21998, 'epoch': 3} {'type': 'loss', 'content': 0.04731006547808647, 'timestamp': '2025-09-10 03:04:22.590723', 'step': 21999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:22.645694', 'step': 21999, 'epoch': 3} {'type': 'loss', 'content': 0.05051556974649429, 'timestamp': '2025-09-10 03:04:22.653249', 'step': 22000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 22000', 'timestamp': '2025-09-10 03:04:22.991248', 'step': 22000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.047841', 'step': 22000, 'epoch': 3} {'type': 'loss', 'content': 0.11635104566812515, 'timestamp': '2025-09-10 03:04:23.052059', 'step': 22001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.114552', 'step': 22001, 'epoch': 3} {'type': 'loss', 'content': 0.05505254119634628, 'timestamp': '2025-09-10 03:04:23.117329', 'step': 22002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:23.172391', 'step': 22002, 'epoch': 3} {'type': 'loss', 'content': 0.08202370256185532, 'timestamp': '2025-09-10 03:04:23.174646', 'step': 22003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.228393', 'step': 22003, 'epoch': 3} {'type': 'loss', 'content': 0.09706021845340729, 'timestamp': '2025-09-10 03:04:23.234623', 'step': 22004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:23.287610', 'step': 22004, 'epoch': 3} {'type': 'loss', 'content': 0.12197797745466232, 'timestamp': '2025-09-10 03:04:23.289804', 'step': 22005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.343227', 'step': 22005, 'epoch': 3} {'type': 'loss', 'content': 0.0992802083492279, 'timestamp': '2025-09-10 03:04:23.345336', 'step': 22006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.398939', 'step': 22006, 'epoch': 3} {'type': 'loss', 'content': 0.14115622639656067, 'timestamp': '2025-09-10 03:04:23.401249', 'step': 22007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.454781', 'step': 22007, 'epoch': 3} {'type': 'loss', 'content': 0.09026701748371124, 'timestamp': '2025-09-10 03:04:23.460791', 'step': 22008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.514959', 'step': 22008, 'epoch': 3} {'type': 'loss', 'content': 0.06673388928174973, 'timestamp': '2025-09-10 03:04:23.517173', 'step': 22009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:23.570124', 'step': 22009, 'epoch': 3} {'type': 'loss', 'content': 0.007528145797550678, 'timestamp': '2025-09-10 03:04:23.572321', 'step': 22010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.626997', 'step': 22010, 'epoch': 3} {'type': 'loss', 'content': 0.0877542644739151, 'timestamp': '2025-09-10 03:04:23.629297', 'step': 22011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:23.683480', 'step': 22011, 'epoch': 3} {'type': 'loss', 'content': 0.04079708084464073, 'timestamp': '2025-09-10 03:04:23.689627', 'step': 22012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:23.742660', 'step': 22012, 'epoch': 3} {'type': 'loss', 'content': 0.058819159865379333, 'timestamp': '2025-09-10 03:04:23.744933', 'step': 22013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:23.799879', 'step': 22013, 'epoch': 3} {'type': 'loss', 'content': 0.030993958935141563, 'timestamp': '2025-09-10 03:04:23.802211', 'step': 22014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.856507', 'step': 22014, 'epoch': 3} {'type': 'loss', 'content': 0.157364621758461, 'timestamp': '2025-09-10 03:04:23.858836', 'step': 22015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:23.912436', 'step': 22015, 'epoch': 3} {'type': 'loss', 'content': 0.0861140787601471, 'timestamp': '2025-09-10 03:04:23.918482', 'step': 22016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:23.971722', 'step': 22016, 'epoch': 3} {'type': 'loss', 'content': 0.07592958211898804, 'timestamp': '2025-09-10 03:04:23.974150', 'step': 22017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:24.026780', 'step': 22017, 'epoch': 3} {'type': 'loss', 'content': 0.07490358501672745, 'timestamp': '2025-09-10 03:04:24.028930', 'step': 22018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:24.082684', 'step': 22018, 'epoch': 3} {'type': 'loss', 'content': 0.04030362144112587, 'timestamp': '2025-09-10 03:04:24.084948', 'step': 22019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:24.137718', 'step': 22019, 'epoch': 3} {'type': 'loss', 'content': 0.049048714339733124, 'timestamp': '2025-09-10 03:04:24.143691', 'step': 22020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:24.197365', 'step': 22020, 'epoch': 3} {'type': 'loss', 'content': 0.09556175023317337, 'timestamp': '2025-09-10 03:04:24.199423', 'step': 22021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:24.253279', 'step': 22021, 'epoch': 3} {'type': 'loss', 'content': 0.1531578153371811, 'timestamp': '2025-09-10 03:04:24.255520', 'step': 22022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:24.309287', 'step': 22022, 'epoch': 3} {'type': 'loss', 'content': 0.08396195620298386, 'timestamp': '2025-09-10 03:04:24.311527', 'step': 22023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:24.364392', 'step': 22023, 'epoch': 3} {'type': 'loss', 'content': 0.14631244540214539, 'timestamp': '2025-09-10 03:04:24.370313', 'step': 22024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:24.422617', 'step': 22024, 'epoch': 3} {'type': 'loss', 'content': 0.06281353533267975, 'timestamp': '2025-09-10 03:04:24.424847', 'step': 22025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:24.478324', 'step': 22025, 'epoch': 3} {'type': 'loss', 'content': 0.07628625631332397, 'timestamp': '2025-09-10 03:04:24.480465', 'step': 22026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:24.535885', 'step': 22026, 'epoch': 3} {'type': 'loss', 'content': 0.05470128357410431, 'timestamp': '2025-09-10 03:04:24.538286', 'step': 22027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:04:24.591804', 'step': 22027, 'epoch': 3} {'type': 'loss', 'content': 0.03606070205569267, 'timestamp': '2025-09-10 03:04:24.597707', 'step': 22028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:24.651873', 'step': 22028, 'epoch': 3} {'type': 'loss', 'content': 0.03959537297487259, 'timestamp': '2025-09-10 03:04:24.653986', 'step': 22029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:24.710248', 'step': 22029, 'epoch': 3} {'type': 'loss', 'content': 0.06496939063072205, 'timestamp': '2025-09-10 03:04:24.712807', 'step': 22030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:24.768652', 'step': 22030, 'epoch': 3} {'type': 'loss', 'content': 0.08769110590219498, 'timestamp': '2025-09-10 03:04:24.771033', 'step': 22031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:24.824448', 'step': 22031, 'epoch': 3} {'type': 'loss', 'content': 0.14126087725162506, 'timestamp': '2025-09-10 03:04:24.830461', 'step': 22032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:24.883847', 'step': 22032, 'epoch': 3} {'type': 'loss', 'content': 0.08888405561447144, 'timestamp': '2025-09-10 03:04:24.886142', 'step': 22033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:24.939675', 'step': 22033, 'epoch': 3} {'type': 'loss', 'content': 0.040469489991664886, 'timestamp': '2025-09-10 03:04:24.941935', 'step': 22034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:25.000377', 'step': 22034, 'epoch': 3} {'type': 'loss', 'content': 0.1131591945886612, 'timestamp': '2025-09-10 03:04:25.002733', 'step': 22035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:25.056963', 'step': 22035, 'epoch': 3} {'type': 'loss', 'content': 0.04180009290575981, 'timestamp': '2025-09-10 03:04:25.062960', 'step': 22036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:25.126919', 'step': 22036, 'epoch': 3} {'type': 'loss', 'content': 0.08560702949762344, 'timestamp': '2025-09-10 03:04:25.129288', 'step': 22037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:25.183741', 'step': 22037, 'epoch': 3} {'type': 'loss', 'content': 0.16516020894050598, 'timestamp': '2025-09-10 03:04:25.186072', 'step': 22038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:25.240607', 'step': 22038, 'epoch': 3} {'type': 'loss', 'content': 0.03504697605967522, 'timestamp': '2025-09-10 03:04:25.243018', 'step': 22039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:25.297419', 'step': 22039, 'epoch': 3} {'type': 'loss', 'content': 0.08399395644664764, 'timestamp': '2025-09-10 03:04:25.303622', 'step': 22040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:25.357138', 'step': 22040, 'epoch': 3} {'type': 'loss', 'content': 0.09175551682710648, 'timestamp': '2025-09-10 03:04:25.359486', 'step': 22041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:25.413095', 'step': 22041, 'epoch': 3} {'type': 'loss', 'content': 0.05091485753655434, 'timestamp': '2025-09-10 03:04:25.415496', 'step': 22042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:25.468861', 'step': 22042, 'epoch': 3} {'type': 'loss', 'content': 0.057691507041454315, 'timestamp': '2025-09-10 03:04:25.471152', 'step': 22043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:25.524735', 'step': 22043, 'epoch': 3} {'type': 'loss', 'content': 0.04802887141704559, 'timestamp': '2025-09-10 03:04:25.530910', 'step': 22044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:25.583813', 'step': 22044, 'epoch': 3} {'type': 'loss', 'content': 0.046879738569259644, 'timestamp': '2025-09-10 03:04:25.586315', 'step': 22045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:25.643006', 'step': 22045, 'epoch': 3} {'type': 'loss', 'content': 0.056417107582092285, 'timestamp': '2025-09-10 03:04:25.645423', 'step': 22046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:25.698813', 'step': 22046, 'epoch': 3} {'type': 'loss', 'content': 0.08460072427988052, 'timestamp': '2025-09-10 03:04:25.701176', 'step': 22047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:25.754416', 'step': 22047, 'epoch': 3} {'type': 'loss', 'content': 0.1280287802219391, 'timestamp': '2025-09-10 03:04:25.760421', 'step': 22048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:25.817172', 'step': 22048, 'epoch': 3} {'type': 'loss', 'content': 0.04395527020096779, 'timestamp': '2025-09-10 03:04:25.819447', 'step': 22049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:25.874102', 'step': 22049, 'epoch': 3} {'type': 'loss', 'content': 0.15575429797172546, 'timestamp': '2025-09-10 03:04:25.876541', 'step': 22050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:25.929773', 'step': 22050, 'epoch': 3} {'type': 'loss', 'content': 0.07023914903402328, 'timestamp': '2025-09-10 03:04:25.932119', 'step': 22051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:25.986290', 'step': 22051, 'epoch': 3} {'type': 'loss', 'content': 0.056465186178684235, 'timestamp': '2025-09-10 03:04:25.992393', 'step': 22052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:26.045382', 'step': 22052, 'epoch': 3} {'type': 'loss', 'content': 0.09979671984910965, 'timestamp': '2025-09-10 03:04:26.047700', 'step': 22053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:26.101146', 'step': 22053, 'epoch': 3} {'type': 'loss', 'content': 0.0364067368209362, 'timestamp': '2025-09-10 03:04:26.104681', 'step': 22054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:26.158062', 'step': 22054, 'epoch': 3} {'type': 'loss', 'content': 0.09046132117509842, 'timestamp': '2025-09-10 03:04:26.160320', 'step': 22055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:26.213495', 'step': 22055, 'epoch': 3} {'type': 'loss', 'content': 0.08293161541223526, 'timestamp': '2025-09-10 03:04:26.219404', 'step': 22056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:26.272336', 'step': 22056, 'epoch': 3} {'type': 'loss', 'content': 0.033222462981939316, 'timestamp': '2025-09-10 03:04:26.274599', 'step': 22057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:26.328402', 'step': 22057, 'epoch': 3} {'type': 'loss', 'content': 0.04063371196389198, 'timestamp': '2025-09-10 03:04:26.330846', 'step': 22058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:26.384206', 'step': 22058, 'epoch': 3} {'type': 'loss', 'content': 0.03787387162446976, 'timestamp': '2025-09-10 03:04:26.386574', 'step': 22059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:26.439990', 'step': 22059, 'epoch': 3} {'type': 'loss', 'content': 0.07107339054346085, 'timestamp': '2025-09-10 03:04:26.446244', 'step': 22060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:26.499156', 'step': 22060, 'epoch': 3} {'type': 'loss', 'content': 0.044718217104673386, 'timestamp': '2025-09-10 03:04:26.501387', 'step': 22061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:26.554971', 'step': 22061, 'epoch': 3} {'type': 'loss', 'content': 0.03565512225031853, 'timestamp': '2025-09-10 03:04:26.557303', 'step': 22062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:26.610361', 'step': 22062, 'epoch': 3} {'type': 'loss', 'content': 0.08168508857488632, 'timestamp': '2025-09-10 03:04:26.612818', 'step': 22063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:26.666073', 'step': 22063, 'epoch': 3} {'type': 'loss', 'content': 0.09768626093864441, 'timestamp': '2025-09-10 03:04:26.671799', 'step': 22064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:26.725978', 'step': 22064, 'epoch': 3} {'type': 'loss', 'content': 0.013328880071640015, 'timestamp': '2025-09-10 03:04:26.728216', 'step': 22065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:26.781301', 'step': 22065, 'epoch': 3} {'type': 'loss', 'content': 0.09041360765695572, 'timestamp': '2025-09-10 03:04:26.783577', 'step': 22066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:26.836649', 'step': 22066, 'epoch': 3} {'type': 'loss', 'content': 0.04732203111052513, 'timestamp': '2025-09-10 03:04:26.842417', 'step': 22067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:26.904993', 'step': 22067, 'epoch': 3} {'type': 'loss', 'content': 0.07349541038274765, 'timestamp': '2025-09-10 03:04:26.911027', 'step': 22068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:26.964236', 'step': 22068, 'epoch': 3} {'type': 'loss', 'content': 0.1483982801437378, 'timestamp': '2025-09-10 03:04:26.966458', 'step': 22069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:27.019919', 'step': 22069, 'epoch': 3} {'type': 'loss', 'content': 0.09970010817050934, 'timestamp': '2025-09-10 03:04:27.022139', 'step': 22070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:27.076056', 'step': 22070, 'epoch': 3} {'type': 'loss', 'content': 0.07758570462465286, 'timestamp': '2025-09-10 03:04:27.078331', 'step': 22071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:27.133697', 'step': 22071, 'epoch': 3} {'type': 'loss', 'content': 0.06829310953617096, 'timestamp': '2025-09-10 03:04:27.139782', 'step': 22072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:27.193260', 'step': 22072, 'epoch': 3} {'type': 'loss', 'content': 0.049270473420619965, 'timestamp': '2025-09-10 03:04:27.195642', 'step': 22073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:27.248677', 'step': 22073, 'epoch': 3} {'type': 'loss', 'content': 0.08172079920768738, 'timestamp': '2025-09-10 03:04:27.251107', 'step': 22074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:27.305671', 'step': 22074, 'epoch': 3} {'type': 'loss', 'content': 0.06659511476755142, 'timestamp': '2025-09-10 03:04:27.313771', 'step': 22075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:27.376075', 'step': 22075, 'epoch': 3} {'type': 'loss', 'content': 0.12066343426704407, 'timestamp': '2025-09-10 03:04:27.382336', 'step': 22076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:27.437023', 'step': 22076, 'epoch': 3} {'type': 'loss', 'content': 0.06018102169036865, 'timestamp': '2025-09-10 03:04:27.439407', 'step': 22077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:04:27.493763', 'step': 22077, 'epoch': 3} {'type': 'loss', 'content': 0.08564537763595581, 'timestamp': '2025-09-10 03:04:27.496150', 'step': 22078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:27.549493', 'step': 22078, 'epoch': 3} {'type': 'loss', 'content': 0.1770632266998291, 'timestamp': '2025-09-10 03:04:27.551738', 'step': 22079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:27.606234', 'step': 22079, 'epoch': 3} {'type': 'loss', 'content': 0.041377756744623184, 'timestamp': '2025-09-10 03:04:27.612395', 'step': 22080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:27.670311', 'step': 22080, 'epoch': 3} {'type': 'loss', 'content': 0.17646054923534393, 'timestamp': '2025-09-10 03:04:27.672549', 'step': 22081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:27.736812', 'step': 22081, 'epoch': 3} {'type': 'loss', 'content': 0.12176081538200378, 'timestamp': '2025-09-10 03:04:27.738801', 'step': 22082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:27.791911', 'step': 22082, 'epoch': 3} {'type': 'loss', 'content': 0.09869558364152908, 'timestamp': '2025-09-10 03:04:27.794130', 'step': 22083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:27.848941', 'step': 22083, 'epoch': 3} {'type': 'loss', 'content': 0.10907402634620667, 'timestamp': '2025-09-10 03:04:27.854880', 'step': 22084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:27.907998', 'step': 22084, 'epoch': 3} {'type': 'loss', 'content': 0.09352084249258041, 'timestamp': '2025-09-10 03:04:27.910202', 'step': 22085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:27.964610', 'step': 22085, 'epoch': 3} {'type': 'loss', 'content': 0.04401780292391777, 'timestamp': '2025-09-10 03:04:27.966862', 'step': 22086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:28.021933', 'step': 22086, 'epoch': 3} {'type': 'loss', 'content': 0.1863379180431366, 'timestamp': '2025-09-10 03:04:28.024366', 'step': 22087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:28.078198', 'step': 22087, 'epoch': 3} {'type': 'loss', 'content': 0.051434118300676346, 'timestamp': '2025-09-10 03:04:28.084359', 'step': 22088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:28.137018', 'step': 22088, 'epoch': 3} {'type': 'loss', 'content': 0.10131610184907913, 'timestamp': '2025-09-10 03:04:28.139333', 'step': 22089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:28.194792', 'step': 22089, 'epoch': 3} {'type': 'loss', 'content': 0.06277499347925186, 'timestamp': '2025-09-10 03:04:28.197016', 'step': 22090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:28.250207', 'step': 22090, 'epoch': 3} {'type': 'loss', 'content': 0.07438547164201736, 'timestamp': '2025-09-10 03:04:28.252475', 'step': 22091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:28.310847', 'step': 22091, 'epoch': 3} {'type': 'loss', 'content': 0.07747547328472137, 'timestamp': '2025-09-10 03:04:28.316839', 'step': 22092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:28.369661', 'step': 22092, 'epoch': 3} {'type': 'loss', 'content': 0.052049972116947174, 'timestamp': '2025-09-10 03:04:28.371841', 'step': 22093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:28.424752', 'step': 22093, 'epoch': 3} {'type': 'loss', 'content': 0.08095066994428635, 'timestamp': '2025-09-10 03:04:28.426810', 'step': 22094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:28.484063', 'step': 22094, 'epoch': 3} {'type': 'loss', 'content': 0.060892049223184586, 'timestamp': '2025-09-10 03:04:28.486302', 'step': 22095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:28.539439', 'step': 22095, 'epoch': 3} {'type': 'loss', 'content': 0.10371416062116623, 'timestamp': '2025-09-10 03:04:28.545485', 'step': 22096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:28.598025', 'step': 22096, 'epoch': 3} {'type': 'loss', 'content': 0.12813051044940948, 'timestamp': '2025-09-10 03:04:28.600366', 'step': 22097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:28.653358', 'step': 22097, 'epoch': 3} {'type': 'loss', 'content': 0.05445188656449318, 'timestamp': '2025-09-10 03:04:28.655589', 'step': 22098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:28.709081', 'step': 22098, 'epoch': 3} {'type': 'loss', 'content': 0.06033913046121597, 'timestamp': '2025-09-10 03:04:28.711474', 'step': 22099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:28.764558', 'step': 22099, 'epoch': 3} {'type': 'loss', 'content': 0.09141019731760025, 'timestamp': '2025-09-10 03:04:28.771117', 'step': 22100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:28.825004', 'step': 22100, 'epoch': 3} {'type': 'loss', 'content': 0.14277897775173187, 'timestamp': '2025-09-10 03:04:28.827373', 'step': 22101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:28.881047', 'step': 22101, 'epoch': 3} {'type': 'loss', 'content': 0.08734183758497238, 'timestamp': '2025-09-10 03:04:28.883421', 'step': 22102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:28.936938', 'step': 22102, 'epoch': 3} {'type': 'loss', 'content': 0.08325423300266266, 'timestamp': '2025-09-10 03:04:28.939363', 'step': 22103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:28.993393', 'step': 22103, 'epoch': 3} {'type': 'loss', 'content': 0.14328821003437042, 'timestamp': '2025-09-10 03:04:28.999506', 'step': 22104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:29.052814', 'step': 22104, 'epoch': 3} {'type': 'loss', 'content': 0.1446475088596344, 'timestamp': '2025-09-10 03:04:29.055007', 'step': 22105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:29.108593', 'step': 22105, 'epoch': 3} {'type': 'loss', 'content': 0.04728502407670021, 'timestamp': '2025-09-10 03:04:29.110830', 'step': 22106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:29.164526', 'step': 22106, 'epoch': 3} {'type': 'loss', 'content': 0.12198473513126373, 'timestamp': '2025-09-10 03:04:29.166774', 'step': 22107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:29.219847', 'step': 22107, 'epoch': 3} {'type': 'loss', 'content': 0.10108675807714462, 'timestamp': '2025-09-10 03:04:29.225647', 'step': 22108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:29.279178', 'step': 22108, 'epoch': 3} {'type': 'loss', 'content': 0.0925632044672966, 'timestamp': '2025-09-10 03:04:29.281402', 'step': 22109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:29.334572', 'step': 22109, 'epoch': 3} {'type': 'loss', 'content': 0.06731554865837097, 'timestamp': '2025-09-10 03:04:29.336776', 'step': 22110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:29.390606', 'step': 22110, 'epoch': 3} {'type': 'loss', 'content': 0.1264837384223938, 'timestamp': '2025-09-10 03:04:29.392860', 'step': 22111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:29.445849', 'step': 22111, 'epoch': 3} {'type': 'loss', 'content': 0.1115080863237381, 'timestamp': '2025-09-10 03:04:29.451679', 'step': 22112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:29.504713', 'step': 22112, 'epoch': 3} {'type': 'loss', 'content': 0.11369335651397705, 'timestamp': '2025-09-10 03:04:29.506837', 'step': 22113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:29.561031', 'step': 22113, 'epoch': 3} {'type': 'loss', 'content': 0.11281178891658783, 'timestamp': '2025-09-10 03:04:29.563154', 'step': 22114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:29.617247', 'step': 22114, 'epoch': 3} {'type': 'loss', 'content': 0.10125976800918579, 'timestamp': '2025-09-10 03:04:29.619431', 'step': 22115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:29.673251', 'step': 22115, 'epoch': 3} {'type': 'loss', 'content': 0.058321792632341385, 'timestamp': '2025-09-10 03:04:29.679621', 'step': 22116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:29.732966', 'step': 22116, 'epoch': 3} {'type': 'loss', 'content': 0.08105327188968658, 'timestamp': '2025-09-10 03:04:29.735543', 'step': 22117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:29.788677', 'step': 22117, 'epoch': 3} {'type': 'loss', 'content': 0.028206059709191322, 'timestamp': '2025-09-10 03:04:29.790896', 'step': 22118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:29.845787', 'step': 22118, 'epoch': 3} {'type': 'loss', 'content': 0.07654406875371933, 'timestamp': '2025-09-10 03:04:29.847986', 'step': 22119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:29.901055', 'step': 22119, 'epoch': 3} {'type': 'loss', 'content': 0.058520685881376266, 'timestamp': '2025-09-10 03:04:29.907046', 'step': 22120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:29.959928', 'step': 22120, 'epoch': 3} {'type': 'loss', 'content': 0.05187947675585747, 'timestamp': '2025-09-10 03:04:29.962193', 'step': 22121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:30.015527', 'step': 22121, 'epoch': 3} {'type': 'loss', 'content': 0.09912721067667007, 'timestamp': '2025-09-10 03:04:30.017754', 'step': 22122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:30.071825', 'step': 22122, 'epoch': 3} {'type': 'loss', 'content': 0.18821418285369873, 'timestamp': '2025-09-10 03:04:30.074204', 'step': 22123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:30.137169', 'step': 22123, 'epoch': 3} {'type': 'loss', 'content': 0.09675964713096619, 'timestamp': '2025-09-10 03:04:30.143285', 'step': 22124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:30.196177', 'step': 22124, 'epoch': 3} {'type': 'loss', 'content': 0.09318265318870544, 'timestamp': '2025-09-10 03:04:30.198373', 'step': 22125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:30.251743', 'step': 22125, 'epoch': 3} {'type': 'loss', 'content': 0.08702553063631058, 'timestamp': '2025-09-10 03:04:30.254088', 'step': 22126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:30.307426', 'step': 22126, 'epoch': 3} {'type': 'loss', 'content': 0.16625940799713135, 'timestamp': '2025-09-10 03:04:30.309652', 'step': 22127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:30.364856', 'step': 22127, 'epoch': 3} {'type': 'loss', 'content': 0.11614970117807388, 'timestamp': '2025-09-10 03:04:30.370775', 'step': 22128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:30.423087', 'step': 22128, 'epoch': 3} {'type': 'loss', 'content': 0.061695799231529236, 'timestamp': '2025-09-10 03:04:30.425324', 'step': 22129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:04:30.479186', 'step': 22129, 'epoch': 3} {'type': 'loss', 'content': 0.10104071348905563, 'timestamp': '2025-09-10 03:04:30.481565', 'step': 22130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:30.535296', 'step': 22130, 'epoch': 3} {'type': 'loss', 'content': 0.04762435704469681, 'timestamp': '2025-09-10 03:04:30.537656', 'step': 22131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:30.591273', 'step': 22131, 'epoch': 3} {'type': 'loss', 'content': 0.07763142138719559, 'timestamp': '2025-09-10 03:04:30.597571', 'step': 22132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:30.651695', 'step': 22132, 'epoch': 3} {'type': 'loss', 'content': 0.07232318818569183, 'timestamp': '2025-09-10 03:04:30.654454', 'step': 22133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:30.707960', 'step': 22133, 'epoch': 3} {'type': 'loss', 'content': 0.10365794599056244, 'timestamp': '2025-09-10 03:04:30.710484', 'step': 22134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:30.764811', 'step': 22134, 'epoch': 3} {'type': 'loss', 'content': 0.05278155207633972, 'timestamp': '2025-09-10 03:04:30.767099', 'step': 22135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:30.820706', 'step': 22135, 'epoch': 3} {'type': 'loss', 'content': 0.058526892215013504, 'timestamp': '2025-09-10 03:04:30.826539', 'step': 22136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:30.879490', 'step': 22136, 'epoch': 3} {'type': 'loss', 'content': 0.07983510196208954, 'timestamp': '2025-09-10 03:04:30.881635', 'step': 22137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:30.934689', 'step': 22137, 'epoch': 3} {'type': 'loss', 'content': 0.035194460302591324, 'timestamp': '2025-09-10 03:04:30.936892', 'step': 22138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:30.990300', 'step': 22138, 'epoch': 3} {'type': 'loss', 'content': 0.04942799359560013, 'timestamp': '2025-09-10 03:04:30.992296', 'step': 22139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:31.046396', 'step': 22139, 'epoch': 3} {'type': 'loss', 'content': 0.10553768277168274, 'timestamp': '2025-09-10 03:04:31.052290', 'step': 22140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:31.104958', 'step': 22140, 'epoch': 3} {'type': 'loss', 'content': 0.06809786707162857, 'timestamp': '2025-09-10 03:04:31.106970', 'step': 22141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:31.160794', 'step': 22141, 'epoch': 3} {'type': 'loss', 'content': 0.08616233617067337, 'timestamp': '2025-09-10 03:04:31.163047', 'step': 22142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:31.217117', 'step': 22142, 'epoch': 3} {'type': 'loss', 'content': 0.06733685731887817, 'timestamp': '2025-09-10 03:04:31.219410', 'step': 22143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:31.272907', 'step': 22143, 'epoch': 3} {'type': 'loss', 'content': 0.07794318348169327, 'timestamp': '2025-09-10 03:04:31.279226', 'step': 22144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:31.332094', 'step': 22144, 'epoch': 3} {'type': 'loss', 'content': 0.09725356101989746, 'timestamp': '2025-09-10 03:04:31.334447', 'step': 22145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:31.394877', 'step': 22145, 'epoch': 3} {'type': 'loss', 'content': 0.12280752509832382, 'timestamp': '2025-09-10 03:04:31.397262', 'step': 22146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:31.451595', 'step': 22146, 'epoch': 3} {'type': 'loss', 'content': 0.07916576415300369, 'timestamp': '2025-09-10 03:04:31.453846', 'step': 22147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:31.507170', 'step': 22147, 'epoch': 3} {'type': 'loss', 'content': 0.044237807393074036, 'timestamp': '2025-09-10 03:04:31.513115', 'step': 22148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:04:31.565657', 'step': 22148, 'epoch': 3} {'type': 'loss', 'content': 0.04928450286388397, 'timestamp': '2025-09-10 03:04:31.567867', 'step': 22149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:31.620593', 'step': 22149, 'epoch': 3} {'type': 'loss', 'content': 0.04717273637652397, 'timestamp': '2025-09-10 03:04:31.622789', 'step': 22150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:31.676804', 'step': 22150, 'epoch': 3} {'type': 'loss', 'content': 0.10637002438306808, 'timestamp': '2025-09-10 03:04:31.679016', 'step': 22151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:31.732541', 'step': 22151, 'epoch': 3} {'type': 'loss', 'content': 0.05897145718336105, 'timestamp': '2025-09-10 03:04:31.738570', 'step': 22152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:31.791112', 'step': 22152, 'epoch': 3} {'type': 'loss', 'content': 0.030344605445861816, 'timestamp': '2025-09-10 03:04:31.793460', 'step': 22153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:31.846692', 'step': 22153, 'epoch': 3} {'type': 'loss', 'content': 0.05958759784698486, 'timestamp': '2025-09-10 03:04:31.848932', 'step': 22154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:31.902637', 'step': 22154, 'epoch': 3} {'type': 'loss', 'content': 0.025000179186463356, 'timestamp': '2025-09-10 03:04:31.904891', 'step': 22155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:31.958329', 'step': 22155, 'epoch': 3} {'type': 'loss', 'content': 0.05855101719498634, 'timestamp': '2025-09-10 03:04:31.964266', 'step': 22156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:32.017195', 'step': 22156, 'epoch': 3} {'type': 'loss', 'content': 0.06380590796470642, 'timestamp': '2025-09-10 03:04:32.019313', 'step': 22157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:32.072572', 'step': 22157, 'epoch': 3} {'type': 'loss', 'content': 0.02815878391265869, 'timestamp': '2025-09-10 03:04:32.074581', 'step': 22158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:32.127698', 'step': 22158, 'epoch': 3} {'type': 'loss', 'content': 0.11399912089109421, 'timestamp': '2025-09-10 03:04:32.129914', 'step': 22159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:32.184234', 'step': 22159, 'epoch': 3} {'type': 'loss', 'content': 0.11105555295944214, 'timestamp': '2025-09-10 03:04:32.190464', 'step': 22160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:32.245662', 'step': 22160, 'epoch': 3} {'type': 'loss', 'content': 0.0819346159696579, 'timestamp': '2025-09-10 03:04:32.247944', 'step': 22161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:32.303049', 'step': 22161, 'epoch': 3} {'type': 'loss', 'content': 0.05291247367858887, 'timestamp': '2025-09-10 03:04:32.305415', 'step': 22162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:32.358034', 'step': 22162, 'epoch': 3} {'type': 'loss', 'content': 0.14473199844360352, 'timestamp': '2025-09-10 03:04:32.361492', 'step': 22163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:32.414907', 'step': 22163, 'epoch': 3} {'type': 'loss', 'content': 0.05269812047481537, 'timestamp': '2025-09-10 03:04:32.421156', 'step': 22164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:32.474776', 'step': 22164, 'epoch': 3} {'type': 'loss', 'content': 0.11742807179689407, 'timestamp': '2025-09-10 03:04:32.477011', 'step': 22165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:32.548503', 'step': 22165, 'epoch': 3} {'type': 'loss', 'content': 0.09238623827695847, 'timestamp': '2025-09-10 03:04:32.551051', 'step': 22166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:32.609456', 'step': 22166, 'epoch': 3} {'type': 'loss', 'content': 0.07515928894281387, 'timestamp': '2025-09-10 03:04:32.611636', 'step': 22167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:32.669790', 'step': 22167, 'epoch': 3} {'type': 'loss', 'content': 0.11939507722854614, 'timestamp': '2025-09-10 03:04:32.675892', 'step': 22168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:32.732128', 'step': 22168, 'epoch': 3} {'type': 'loss', 'content': 0.05037557706236839, 'timestamp': '2025-09-10 03:04:32.734409', 'step': 22169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:32.788874', 'step': 22169, 'epoch': 3} {'type': 'loss', 'content': 0.08296360820531845, 'timestamp': '2025-09-10 03:04:32.791193', 'step': 22170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:32.850665', 'step': 22170, 'epoch': 3} {'type': 'loss', 'content': 0.04101848602294922, 'timestamp': '2025-09-10 03:04:32.853113', 'step': 22171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:32.907116', 'step': 22171, 'epoch': 3} {'type': 'loss', 'content': 0.0667533278465271, 'timestamp': '2025-09-10 03:04:32.913168', 'step': 22172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:32.966229', 'step': 22172, 'epoch': 3} {'type': 'loss', 'content': 0.1190357431769371, 'timestamp': '2025-09-10 03:04:32.968657', 'step': 22173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:33.023387', 'step': 22173, 'epoch': 3} {'type': 'loss', 'content': 0.09588614851236343, 'timestamp': '2025-09-10 03:04:33.031371', 'step': 22174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:33.090288', 'step': 22174, 'epoch': 3} {'type': 'loss', 'content': 0.05377238988876343, 'timestamp': '2025-09-10 03:04:33.092577', 'step': 22175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:33.148929', 'step': 22175, 'epoch': 3} {'type': 'loss', 'content': 0.12248321622610092, 'timestamp': '2025-09-10 03:04:33.154909', 'step': 22176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:33.207989', 'step': 22176, 'epoch': 3} {'type': 'loss', 'content': 0.0355561189353466, 'timestamp': '2025-09-10 03:04:33.210197', 'step': 22177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:33.264917', 'step': 22177, 'epoch': 3} {'type': 'loss', 'content': 0.09391987323760986, 'timestamp': '2025-09-10 03:04:33.267180', 'step': 22178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:33.334275', 'step': 22178, 'epoch': 3} {'type': 'loss', 'content': 0.11512096971273422, 'timestamp': '2025-09-10 03:04:33.340026', 'step': 22179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:33.397829', 'step': 22179, 'epoch': 3} {'type': 'loss', 'content': 0.16033172607421875, 'timestamp': '2025-09-10 03:04:33.403925', 'step': 22180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:33.457065', 'step': 22180, 'epoch': 3} {'type': 'loss', 'content': 0.13437026739120483, 'timestamp': '2025-09-10 03:04:33.459409', 'step': 22181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:33.512481', 'step': 22181, 'epoch': 3} {'type': 'loss', 'content': 0.036419909447431564, 'timestamp': '2025-09-10 03:04:33.514708', 'step': 22182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:33.568239', 'step': 22182, 'epoch': 3} {'type': 'loss', 'content': 0.023155653849244118, 'timestamp': '2025-09-10 03:04:33.570519', 'step': 22183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:33.623653', 'step': 22183, 'epoch': 3} {'type': 'loss', 'content': 0.07228951156139374, 'timestamp': '2025-09-10 03:04:33.629704', 'step': 22184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:33.682495', 'step': 22184, 'epoch': 3} {'type': 'loss', 'content': 0.1502191573381424, 'timestamp': '2025-09-10 03:04:33.684684', 'step': 22185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:33.738199', 'step': 22185, 'epoch': 3} {'type': 'loss', 'content': 0.10573110729455948, 'timestamp': '2025-09-10 03:04:33.740468', 'step': 22186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:33.794331', 'step': 22186, 'epoch': 3} {'type': 'loss', 'content': 0.018822696059942245, 'timestamp': '2025-09-10 03:04:33.796716', 'step': 22187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:33.850884', 'step': 22187, 'epoch': 3} {'type': 'loss', 'content': 0.06576524674892426, 'timestamp': '2025-09-10 03:04:33.857166', 'step': 22188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:33.911327', 'step': 22188, 'epoch': 3} {'type': 'loss', 'content': 0.07898205518722534, 'timestamp': '2025-09-10 03:04:33.913849', 'step': 22189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:33.967036', 'step': 22189, 'epoch': 3} {'type': 'loss', 'content': 0.045752424746751785, 'timestamp': '2025-09-10 03:04:33.969307', 'step': 22190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:34.023407', 'step': 22190, 'epoch': 3} {'type': 'loss', 'content': 0.10886713862419128, 'timestamp': '2025-09-10 03:04:34.025453', 'step': 22191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:34.079505', 'step': 22191, 'epoch': 3} {'type': 'loss', 'content': 0.08287382870912552, 'timestamp': '2025-09-10 03:04:34.085604', 'step': 22192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:34.138840', 'step': 22192, 'epoch': 3} {'type': 'loss', 'content': 0.09165215492248535, 'timestamp': '2025-09-10 03:04:34.141042', 'step': 22193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:34.193973', 'step': 22193, 'epoch': 3} {'type': 'loss', 'content': 0.07548609375953674, 'timestamp': '2025-09-10 03:04:34.196187', 'step': 22194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:34.249372', 'step': 22194, 'epoch': 3} {'type': 'loss', 'content': 0.03210362046957016, 'timestamp': '2025-09-10 03:04:34.251603', 'step': 22195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:34.304413', 'step': 22195, 'epoch': 3} {'type': 'loss', 'content': 0.11710619926452637, 'timestamp': '2025-09-10 03:04:34.310489', 'step': 22196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:34.364720', 'step': 22196, 'epoch': 3} {'type': 'loss', 'content': 0.02693367376923561, 'timestamp': '2025-09-10 03:04:34.366993', 'step': 22197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:34.440357', 'step': 22197, 'epoch': 3} {'type': 'loss', 'content': 0.12413494288921356, 'timestamp': '2025-09-10 03:04:34.442590', 'step': 22198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:34.496430', 'step': 22198, 'epoch': 3} {'type': 'loss', 'content': 0.043864596635103226, 'timestamp': '2025-09-10 03:04:34.498691', 'step': 22199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:34.551625', 'step': 22199, 'epoch': 3} {'type': 'loss', 'content': 0.06473036855459213, 'timestamp': '2025-09-10 03:04:34.557715', 'step': 22200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:34.610529', 'step': 22200, 'epoch': 3} {'type': 'loss', 'content': 0.17390704154968262, 'timestamp': '2025-09-10 03:04:34.612735', 'step': 22201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:34.666925', 'step': 22201, 'epoch': 3} {'type': 'loss', 'content': 0.046672847121953964, 'timestamp': '2025-09-10 03:04:34.669512', 'step': 22202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:34.723452', 'step': 22202, 'epoch': 3} {'type': 'loss', 'content': 0.08363133668899536, 'timestamp': '2025-09-10 03:04:34.725849', 'step': 22203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:34.779814', 'step': 22203, 'epoch': 3} {'type': 'loss', 'content': 0.0021275347098708153, 'timestamp': '2025-09-10 03:04:34.785802', 'step': 22204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:34.838680', 'step': 22204, 'epoch': 3} {'type': 'loss', 'content': 0.0325135737657547, 'timestamp': '2025-09-10 03:04:34.840794', 'step': 22205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:34.897571', 'step': 22205, 'epoch': 3} {'type': 'loss', 'content': 0.08608510345220566, 'timestamp': '2025-09-10 03:04:34.899762', 'step': 22206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:34.954321', 'step': 22206, 'epoch': 3} {'type': 'loss', 'content': 0.06782889366149902, 'timestamp': '2025-09-10 03:04:34.956589', 'step': 22207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.010055', 'step': 22207, 'epoch': 3} {'type': 'loss', 'content': 0.06691545248031616, 'timestamp': '2025-09-10 03:04:35.016364', 'step': 22208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:35.069326', 'step': 22208, 'epoch': 3} {'type': 'loss', 'content': 0.06942593306303024, 'timestamp': '2025-09-10 03:04:35.071516', 'step': 22209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.124438', 'step': 22209, 'epoch': 3} {'type': 'loss', 'content': 0.11189083009958267, 'timestamp': '2025-09-10 03:04:35.126846', 'step': 22210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:35.180305', 'step': 22210, 'epoch': 3} {'type': 'loss', 'content': 0.05577390268445015, 'timestamp': '2025-09-10 03:04:35.182689', 'step': 22211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.236061', 'step': 22211, 'epoch': 3} {'type': 'loss', 'content': 0.07007772475481033, 'timestamp': '2025-09-10 03:04:35.242350', 'step': 22212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.295442', 'step': 22212, 'epoch': 3} {'type': 'loss', 'content': 0.07939842343330383, 'timestamp': '2025-09-10 03:04:35.297746', 'step': 22213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.351527', 'step': 22213, 'epoch': 3} {'type': 'loss', 'content': 0.08085393905639648, 'timestamp': '2025-09-10 03:04:35.353805', 'step': 22214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.408222', 'step': 22214, 'epoch': 3} {'type': 'loss', 'content': 0.06625183671712875, 'timestamp': '2025-09-10 03:04:35.410618', 'step': 22215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.465118', 'step': 22215, 'epoch': 3} {'type': 'loss', 'content': 0.05654820054769516, 'timestamp': '2025-09-10 03:04:35.471836', 'step': 22216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:35.525305', 'step': 22216, 'epoch': 3} {'type': 'loss', 'content': 0.1062367632985115, 'timestamp': '2025-09-10 03:04:35.527652', 'step': 22217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.581345', 'step': 22217, 'epoch': 3} {'type': 'loss', 'content': 0.12417379766702652, 'timestamp': '2025-09-10 03:04:35.584195', 'step': 22218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.637886', 'step': 22218, 'epoch': 3} {'type': 'loss', 'content': 0.0405392162501812, 'timestamp': '2025-09-10 03:04:35.640184', 'step': 22219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:35.694003', 'step': 22219, 'epoch': 3} {'type': 'loss', 'content': 0.03717867657542229, 'timestamp': '2025-09-10 03:04:35.700250', 'step': 22220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:35.753798', 'step': 22220, 'epoch': 3} {'type': 'loss', 'content': 0.08263109624385834, 'timestamp': '2025-09-10 03:04:35.756063', 'step': 22221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:35.810047', 'step': 22221, 'epoch': 3} {'type': 'loss', 'content': 0.11538722366094589, 'timestamp': '2025-09-10 03:04:35.812416', 'step': 22222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:35.866383', 'step': 22222, 'epoch': 3} {'type': 'loss', 'content': 0.09278199076652527, 'timestamp': '2025-09-10 03:04:35.868657', 'step': 22223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:35.924068', 'step': 22223, 'epoch': 3} {'type': 'loss', 'content': 0.017440538853406906, 'timestamp': '2025-09-10 03:04:35.930480', 'step': 22224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:35.985521', 'step': 22224, 'epoch': 3} {'type': 'loss', 'content': 0.04228361323475838, 'timestamp': '2025-09-10 03:04:35.987793', 'step': 22225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:36.043386', 'step': 22225, 'epoch': 3} {'type': 'loss', 'content': 0.08462078124284744, 'timestamp': '2025-09-10 03:04:36.045719', 'step': 22226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:36.101170', 'step': 22226, 'epoch': 3} {'type': 'loss', 'content': 0.0771181583404541, 'timestamp': '2025-09-10 03:04:36.103513', 'step': 22227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:36.158413', 'step': 22227, 'epoch': 3} {'type': 'loss', 'content': 0.07183738797903061, 'timestamp': '2025-09-10 03:04:36.164681', 'step': 22228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:36.218166', 'step': 22228, 'epoch': 3} {'type': 'loss', 'content': 0.05782429501414299, 'timestamp': '2025-09-10 03:04:36.220474', 'step': 22229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:36.274012', 'step': 22229, 'epoch': 3} {'type': 'loss', 'content': 0.12451577186584473, 'timestamp': '2025-09-10 03:04:36.276680', 'step': 22230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:36.330858', 'step': 22230, 'epoch': 3} {'type': 'loss', 'content': 0.07571438699960709, 'timestamp': '2025-09-10 03:04:36.333300', 'step': 22231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:36.386931', 'step': 22231, 'epoch': 3} {'type': 'loss', 'content': 0.11947709321975708, 'timestamp': '2025-09-10 03:04:36.393378', 'step': 22232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:36.448313', 'step': 22232, 'epoch': 3} {'type': 'loss', 'content': 0.11035267263650894, 'timestamp': '2025-09-10 03:04:36.450656', 'step': 22233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:36.505953', 'step': 22233, 'epoch': 3} {'type': 'loss', 'content': 0.08632344752550125, 'timestamp': '2025-09-10 03:04:36.508507', 'step': 22234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:36.563256', 'step': 22234, 'epoch': 3} {'type': 'loss', 'content': 0.06666671484708786, 'timestamp': '2025-09-10 03:04:36.566774', 'step': 22235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:36.623974', 'step': 22235, 'epoch': 3} {'type': 'loss', 'content': 0.06583983451128006, 'timestamp': '2025-09-10 03:04:36.630455', 'step': 22236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:36.683878', 'step': 22236, 'epoch': 3} {'type': 'loss', 'content': 0.10241123288869858, 'timestamp': '2025-09-10 03:04:36.686367', 'step': 22237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:36.739344', 'step': 22237, 'epoch': 3} {'type': 'loss', 'content': 0.09312978386878967, 'timestamp': '2025-09-10 03:04:36.742384', 'step': 22238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:36.795798', 'step': 22238, 'epoch': 3} {'type': 'loss', 'content': 0.0493854396045208, 'timestamp': '2025-09-10 03:04:36.797893', 'step': 22239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:36.851695', 'step': 22239, 'epoch': 3} {'type': 'loss', 'content': 0.09965091943740845, 'timestamp': '2025-09-10 03:04:36.857963', 'step': 22240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:36.914064', 'step': 22240, 'epoch': 3} {'type': 'loss', 'content': 0.19942264258861542, 'timestamp': '2025-09-10 03:04:36.916412', 'step': 22241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:36.969799', 'step': 22241, 'epoch': 3} {'type': 'loss', 'content': 0.08929198980331421, 'timestamp': '2025-09-10 03:04:36.972114', 'step': 22242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:37.027103', 'step': 22242, 'epoch': 3} {'type': 'loss', 'content': 0.07732470333576202, 'timestamp': '2025-09-10 03:04:37.029552', 'step': 22243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:37.083525', 'step': 22243, 'epoch': 3} {'type': 'loss', 'content': 0.03286664932966232, 'timestamp': '2025-09-10 03:04:37.091435', 'step': 22244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:37.167875', 'step': 22244, 'epoch': 3} {'type': 'loss', 'content': 0.06159795820713043, 'timestamp': '2025-09-10 03:04:37.169986', 'step': 22245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:37.225408', 'step': 22245, 'epoch': 3} {'type': 'loss', 'content': 0.06375284492969513, 'timestamp': '2025-09-10 03:04:37.227709', 'step': 22246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:37.280806', 'step': 22246, 'epoch': 3} {'type': 'loss', 'content': 0.07973627746105194, 'timestamp': '2025-09-10 03:04:37.282903', 'step': 22247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:37.339063', 'step': 22247, 'epoch': 3} {'type': 'loss', 'content': 0.07306331396102905, 'timestamp': '2025-09-10 03:04:37.345131', 'step': 22248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:37.398015', 'step': 22248, 'epoch': 3} {'type': 'loss', 'content': 0.09558163583278656, 'timestamp': '2025-09-10 03:04:37.402798', 'step': 22249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:37.460537', 'step': 22249, 'epoch': 3} {'type': 'loss', 'content': 0.14150531589984894, 'timestamp': '2025-09-10 03:04:37.462698', 'step': 22250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:37.529367', 'step': 22250, 'epoch': 3} {'type': 'loss', 'content': 0.04083821922540665, 'timestamp': '2025-09-10 03:04:37.531716', 'step': 22251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:37.587546', 'step': 22251, 'epoch': 3} {'type': 'loss', 'content': 0.09460556507110596, 'timestamp': '2025-09-10 03:04:37.593849', 'step': 22252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:37.652164', 'step': 22252, 'epoch': 3} {'type': 'loss', 'content': 0.07425212860107422, 'timestamp': '2025-09-10 03:04:37.654447', 'step': 22253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:37.714323', 'step': 22253, 'epoch': 3} {'type': 'loss', 'content': 0.04302569478750229, 'timestamp': '2025-09-10 03:04:37.721689', 'step': 22254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:37.776089', 'step': 22254, 'epoch': 3} {'type': 'loss', 'content': 0.0886361226439476, 'timestamp': '2025-09-10 03:04:37.778310', 'step': 22255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:37.832831', 'step': 22255, 'epoch': 3} {'type': 'loss', 'content': 0.11080694198608398, 'timestamp': '2025-09-10 03:04:37.840852', 'step': 22256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:37.893677', 'step': 22256, 'epoch': 3} {'type': 'loss', 'content': 0.13118278980255127, 'timestamp': '2025-09-10 03:04:37.897185', 'step': 22257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:37.956356', 'step': 22257, 'epoch': 3} {'type': 'loss', 'content': 0.034695953130722046, 'timestamp': '2025-09-10 03:04:37.958479', 'step': 22258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:38.012426', 'step': 22258, 'epoch': 3} {'type': 'loss', 'content': 0.09537989646196365, 'timestamp': '2025-09-10 03:04:38.014761', 'step': 22259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:38.068119', 'step': 22259, 'epoch': 3} {'type': 'loss', 'content': 0.08755522221326828, 'timestamp': '2025-09-10 03:04:38.078327', 'step': 22260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:38.133177', 'step': 22260, 'epoch': 3} {'type': 'loss', 'content': 0.10536690056324005, 'timestamp': '2025-09-10 03:04:38.135329', 'step': 22261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:38.190772', 'step': 22261, 'epoch': 3} {'type': 'loss', 'content': 0.10365254431962967, 'timestamp': '2025-09-10 03:04:38.192955', 'step': 22262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:38.246367', 'step': 22262, 'epoch': 3} {'type': 'loss', 'content': 0.0341079942882061, 'timestamp': '2025-09-10 03:04:38.248575', 'step': 22263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:38.302302', 'step': 22263, 'epoch': 3} {'type': 'loss', 'content': 0.0968584269285202, 'timestamp': '2025-09-10 03:04:38.308494', 'step': 22264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:38.361130', 'step': 22264, 'epoch': 3} {'type': 'loss', 'content': 0.09914533793926239, 'timestamp': '2025-09-10 03:04:38.364801', 'step': 22265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:38.424002', 'step': 22265, 'epoch': 3} {'type': 'loss', 'content': 0.05434124544262886, 'timestamp': '2025-09-10 03:04:38.426094', 'step': 22266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:38.479512', 'step': 22266, 'epoch': 3} {'type': 'loss', 'content': 0.0518290214240551, 'timestamp': '2025-09-10 03:04:38.481638', 'step': 22267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:38.535627', 'step': 22267, 'epoch': 3} {'type': 'loss', 'content': 0.07509955018758774, 'timestamp': '2025-09-10 03:04:38.542050', 'step': 22268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:38.595098', 'step': 22268, 'epoch': 3} {'type': 'loss', 'content': 0.02148197591304779, 'timestamp': '2025-09-10 03:04:38.597482', 'step': 22269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:38.653465', 'step': 22269, 'epoch': 3} {'type': 'loss', 'content': 0.07086925208568573, 'timestamp': '2025-09-10 03:04:38.655911', 'step': 22270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:38.710355', 'step': 22270, 'epoch': 3} {'type': 'loss', 'content': 0.06900747865438461, 'timestamp': '2025-09-10 03:04:38.712787', 'step': 22271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:38.767393', 'step': 22271, 'epoch': 3} {'type': 'loss', 'content': 0.12819108366966248, 'timestamp': '2025-09-10 03:04:38.773886', 'step': 22272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:38.827331', 'step': 22272, 'epoch': 3} {'type': 'loss', 'content': 0.09259481728076935, 'timestamp': '2025-09-10 03:04:38.831071', 'step': 22273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:38.887205', 'step': 22273, 'epoch': 3} {'type': 'loss', 'content': 0.06297567486763, 'timestamp': '2025-09-10 03:04:38.889480', 'step': 22274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:38.943540', 'step': 22274, 'epoch': 3} {'type': 'loss', 'content': 0.1395280510187149, 'timestamp': '2025-09-10 03:04:38.945737', 'step': 22275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:38.999573', 'step': 22275, 'epoch': 3} {'type': 'loss', 'content': 0.08203613758087158, 'timestamp': '2025-09-10 03:04:39.005730', 'step': 22276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:39.059512', 'step': 22276, 'epoch': 3} {'type': 'loss', 'content': 0.13802972435951233, 'timestamp': '2025-09-10 03:04:39.061570', 'step': 22277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:39.115639', 'step': 22277, 'epoch': 3} {'type': 'loss', 'content': 0.05147431790828705, 'timestamp': '2025-09-10 03:04:39.117804', 'step': 22278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:39.172230', 'step': 22278, 'epoch': 3} {'type': 'loss', 'content': 0.1060284674167633, 'timestamp': '2025-09-10 03:04:39.174412', 'step': 22279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:39.229673', 'step': 22279, 'epoch': 3} {'type': 'loss', 'content': 0.06411230564117432, 'timestamp': '2025-09-10 03:04:39.236609', 'step': 22280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:39.290751', 'step': 22280, 'epoch': 3} {'type': 'loss', 'content': 0.11213436722755432, 'timestamp': '2025-09-10 03:04:39.292949', 'step': 22281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:39.347185', 'step': 22281, 'epoch': 3} {'type': 'loss', 'content': 0.08091965317726135, 'timestamp': '2025-09-10 03:04:39.349405', 'step': 22282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:39.407853', 'step': 22282, 'epoch': 3} {'type': 'loss', 'content': 0.03197488933801651, 'timestamp': '2025-09-10 03:04:39.410072', 'step': 22283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:39.466599', 'step': 22283, 'epoch': 3} {'type': 'loss', 'content': 0.06229199469089508, 'timestamp': '2025-09-10 03:04:39.473544', 'step': 22284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:39.529312', 'step': 22284, 'epoch': 3} {'type': 'loss', 'content': 0.1190049946308136, 'timestamp': '2025-09-10 03:04:39.531712', 'step': 22285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:39.588458', 'step': 22285, 'epoch': 3} {'type': 'loss', 'content': 0.03430522605776787, 'timestamp': '2025-09-10 03:04:39.590780', 'step': 22286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:39.645697', 'step': 22286, 'epoch': 3} {'type': 'loss', 'content': 0.010545658878982067, 'timestamp': '2025-09-10 03:04:39.647960', 'step': 22287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:39.703531', 'step': 22287, 'epoch': 3} {'type': 'loss', 'content': 0.13783778250217438, 'timestamp': '2025-09-10 03:04:39.710309', 'step': 22288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:39.765243', 'step': 22288, 'epoch': 3} {'type': 'loss', 'content': 0.07738999277353287, 'timestamp': '2025-09-10 03:04:39.767683', 'step': 22289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:39.824100', 'step': 22289, 'epoch': 3} {'type': 'loss', 'content': 0.10183220356702805, 'timestamp': '2025-09-10 03:04:39.826282', 'step': 22290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 03:04:39.880821', 'step': 22290, 'epoch': 3} {'type': 'loss', 'content': 0.04439953342080116, 'timestamp': '2025-09-10 03:04:39.882901', 'step': 22291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:39.940377', 'step': 22291, 'epoch': 3} {'type': 'loss', 'content': 0.13183705508708954, 'timestamp': '2025-09-10 03:04:39.946741', 'step': 22292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:40.000243', 'step': 22292, 'epoch': 3} {'type': 'loss', 'content': 0.10401801764965057, 'timestamp': '2025-09-10 03:04:40.002448', 'step': 22293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:40.062705', 'step': 22293, 'epoch': 3} {'type': 'loss', 'content': 0.0881340503692627, 'timestamp': '2025-09-10 03:04:40.064928', 'step': 22294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:40.122505', 'step': 22294, 'epoch': 3} {'type': 'loss', 'content': 0.0833597406744957, 'timestamp': '2025-09-10 03:04:40.124693', 'step': 22295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:40.185100', 'step': 22295, 'epoch': 3} {'type': 'loss', 'content': 0.11374898254871368, 'timestamp': '2025-09-10 03:04:40.191475', 'step': 22296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:40.244670', 'step': 22296, 'epoch': 3} {'type': 'loss', 'content': 0.08893217891454697, 'timestamp': '2025-09-10 03:04:40.246929', 'step': 22297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:40.301963', 'step': 22297, 'epoch': 3} {'type': 'loss', 'content': 0.054543305188417435, 'timestamp': '2025-09-10 03:04:40.304210', 'step': 22298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:40.365202', 'step': 22298, 'epoch': 3} {'type': 'loss', 'content': 0.029574379324913025, 'timestamp': '2025-09-10 03:04:40.367505', 'step': 22299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:40.427787', 'step': 22299, 'epoch': 3} {'type': 'loss', 'content': 0.09761291742324829, 'timestamp': '2025-09-10 03:04:40.434003', 'step': 22300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:40.493251', 'step': 22300, 'epoch': 3} {'type': 'loss', 'content': 0.153840571641922, 'timestamp': '2025-09-10 03:04:40.495546', 'step': 22301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:40.549420', 'step': 22301, 'epoch': 3} {'type': 'loss', 'content': 0.08094365149736404, 'timestamp': '2025-09-10 03:04:40.551759', 'step': 22302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:40.605942', 'step': 22302, 'epoch': 3} {'type': 'loss', 'content': 0.10451769828796387, 'timestamp': '2025-09-10 03:04:40.608015', 'step': 22303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:40.662341', 'step': 22303, 'epoch': 3} {'type': 'loss', 'content': 0.07930482923984528, 'timestamp': '2025-09-10 03:04:40.668302', 'step': 22304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:40.721480', 'step': 22304, 'epoch': 3} {'type': 'loss', 'content': 0.16636653244495392, 'timestamp': '2025-09-10 03:04:40.723205', 'step': 22305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:40.776829', 'step': 22305, 'epoch': 3} {'type': 'loss', 'content': 0.057979609817266464, 'timestamp': '2025-09-10 03:04:40.778854', 'step': 22306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:40.834313', 'step': 22306, 'epoch': 3} {'type': 'loss', 'content': 0.010546495206654072, 'timestamp': '2025-09-10 03:04:40.836579', 'step': 22307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:40.891356', 'step': 22307, 'epoch': 3} {'type': 'loss', 'content': 0.04786304011940956, 'timestamp': '2025-09-10 03:04:40.897602', 'step': 22308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:40.951349', 'step': 22308, 'epoch': 3} {'type': 'loss', 'content': 0.09856025129556656, 'timestamp': '2025-09-10 03:04:40.953414', 'step': 22309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:41.007728', 'step': 22309, 'epoch': 3} {'type': 'loss', 'content': 0.04307306185364723, 'timestamp': '2025-09-10 03:04:41.009887', 'step': 22310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:41.063708', 'step': 22310, 'epoch': 3} {'type': 'loss', 'content': 0.0866733267903328, 'timestamp': '2025-09-10 03:04:41.065967', 'step': 22311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:41.123342', 'step': 22311, 'epoch': 3} {'type': 'loss', 'content': 0.11236636340618134, 'timestamp': '2025-09-10 03:04:41.129248', 'step': 22312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:41.200465', 'step': 22312, 'epoch': 3} {'type': 'loss', 'content': 0.12828026711940765, 'timestamp': '2025-09-10 03:04:41.202227', 'step': 22313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:41.260184', 'step': 22313, 'epoch': 3} {'type': 'loss', 'content': 0.11490768939256668, 'timestamp': '2025-09-10 03:04:41.262016', 'step': 22314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:41.320375', 'step': 22314, 'epoch': 3} {'type': 'loss', 'content': 0.06843261420726776, 'timestamp': '2025-09-10 03:04:41.322164', 'step': 22315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:41.382120', 'step': 22315, 'epoch': 3} {'type': 'loss', 'content': 0.054672326892614365, 'timestamp': '2025-09-10 03:04:41.388419', 'step': 22316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:41.442724', 'step': 22316, 'epoch': 3} {'type': 'loss', 'content': 0.07020711153745651, 'timestamp': '2025-09-10 03:04:41.445119', 'step': 22317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:41.501908', 'step': 22317, 'epoch': 3} {'type': 'loss', 'content': 0.042726386338472366, 'timestamp': '2025-09-10 03:04:41.504300', 'step': 22318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:41.559879', 'step': 22318, 'epoch': 3} {'type': 'loss', 'content': 0.0638953447341919, 'timestamp': '2025-09-10 03:04:41.562099', 'step': 22319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:41.616821', 'step': 22319, 'epoch': 3} {'type': 'loss', 'content': 0.030241111293435097, 'timestamp': '2025-09-10 03:04:41.623057', 'step': 22320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:41.676516', 'step': 22320, 'epoch': 3} {'type': 'loss', 'content': 0.15127494931221008, 'timestamp': '2025-09-10 03:04:41.678417', 'step': 22321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:41.732231', 'step': 22321, 'epoch': 3} {'type': 'loss', 'content': 0.16860605776309967, 'timestamp': '2025-09-10 03:04:41.734126', 'step': 22322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:41.787969', 'step': 22322, 'epoch': 3} {'type': 'loss', 'content': 0.12286500632762909, 'timestamp': '2025-09-10 03:04:41.789766', 'step': 22323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:41.844571', 'step': 22323, 'epoch': 3} {'type': 'loss', 'content': 0.07412069290876389, 'timestamp': '2025-09-10 03:04:41.850845', 'step': 22324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:41.906392', 'step': 22324, 'epoch': 3} {'type': 'loss', 'content': 0.049514904618263245, 'timestamp': '2025-09-10 03:04:41.908545', 'step': 22325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:41.963497', 'step': 22325, 'epoch': 3} {'type': 'loss', 'content': 0.07382985204458237, 'timestamp': '2025-09-10 03:04:41.965696', 'step': 22326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:42.020452', 'step': 22326, 'epoch': 3} {'type': 'loss', 'content': 0.08653914928436279, 'timestamp': '2025-09-10 03:04:42.022610', 'step': 22327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:42.076396', 'step': 22327, 'epoch': 3} {'type': 'loss', 'content': 0.22410160303115845, 'timestamp': '2025-09-10 03:04:42.082673', 'step': 22328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:42.140090', 'step': 22328, 'epoch': 3} {'type': 'loss', 'content': 0.1466473788022995, 'timestamp': '2025-09-10 03:04:42.142117', 'step': 22329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:42.200732', 'step': 22329, 'epoch': 3} {'type': 'loss', 'content': 0.04053746536374092, 'timestamp': '2025-09-10 03:04:42.202705', 'step': 22330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:42.264060', 'step': 22330, 'epoch': 3} {'type': 'loss', 'content': 0.07131993025541306, 'timestamp': '2025-09-10 03:04:42.266283', 'step': 22331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:42.320290', 'step': 22331, 'epoch': 3} {'type': 'loss', 'content': 0.07742615044116974, 'timestamp': '2025-09-10 03:04:42.326462', 'step': 22332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:42.381739', 'step': 22332, 'epoch': 3} {'type': 'loss', 'content': 0.09027045965194702, 'timestamp': '2025-09-10 03:04:42.384027', 'step': 22333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:42.440777', 'step': 22333, 'epoch': 3} {'type': 'loss', 'content': 0.03764047846198082, 'timestamp': '2025-09-10 03:04:42.443007', 'step': 22334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:42.498672', 'step': 22334, 'epoch': 3} {'type': 'loss', 'content': 0.03881644830107689, 'timestamp': '2025-09-10 03:04:42.501387', 'step': 22335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:42.555445', 'step': 22335, 'epoch': 3} {'type': 'loss', 'content': 0.03070109337568283, 'timestamp': '2025-09-10 03:04:42.561347', 'step': 22336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:42.615240', 'step': 22336, 'epoch': 3} {'type': 'loss', 'content': 0.12959781289100647, 'timestamp': '2025-09-10 03:04:42.617168', 'step': 22337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:42.670678', 'step': 22337, 'epoch': 3} {'type': 'loss', 'content': 0.03096180036664009, 'timestamp': '2025-09-10 03:04:42.672409', 'step': 22338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:42.725935', 'step': 22338, 'epoch': 3} {'type': 'loss', 'content': 0.12069212645292282, 'timestamp': '2025-09-10 03:04:42.728075', 'step': 22339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:42.781562', 'step': 22339, 'epoch': 3} {'type': 'loss', 'content': 0.0772520899772644, 'timestamp': '2025-09-10 03:04:42.787676', 'step': 22340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:42.844681', 'step': 22340, 'epoch': 3} {'type': 'loss', 'content': 0.04310956224799156, 'timestamp': '2025-09-10 03:04:42.846911', 'step': 22341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:42.901422', 'step': 22341, 'epoch': 3} {'type': 'loss', 'content': 0.08203423023223877, 'timestamp': '2025-09-10 03:04:42.903651', 'step': 22342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:42.958975', 'step': 22342, 'epoch': 3} {'type': 'loss', 'content': 0.0760253369808197, 'timestamp': '2025-09-10 03:04:42.961200', 'step': 22343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:43.015812', 'step': 22343, 'epoch': 3} {'type': 'loss', 'content': 0.07372106611728668, 'timestamp': '2025-09-10 03:04:43.021937', 'step': 22344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:43.075192', 'step': 22344, 'epoch': 3} {'type': 'loss', 'content': 0.11688488721847534, 'timestamp': '2025-09-10 03:04:43.077363', 'step': 22345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:43.130860', 'step': 22345, 'epoch': 3} {'type': 'loss', 'content': 0.03819170966744423, 'timestamp': '2025-09-10 03:04:43.132719', 'step': 22346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:43.186751', 'step': 22346, 'epoch': 3} {'type': 'loss', 'content': 0.05160882696509361, 'timestamp': '2025-09-10 03:04:43.188684', 'step': 22347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:43.242509', 'step': 22347, 'epoch': 3} {'type': 'loss', 'content': 0.07025784999132156, 'timestamp': '2025-09-10 03:04:43.248255', 'step': 22348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:43.311116', 'step': 22348, 'epoch': 3} {'type': 'loss', 'content': 0.12092030793428421, 'timestamp': '2025-09-10 03:04:43.313214', 'step': 22349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:43.369733', 'step': 22349, 'epoch': 3} {'type': 'loss', 'content': 0.08189476281404495, 'timestamp': '2025-09-10 03:04:43.371906', 'step': 22350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:43.425233', 'step': 22350, 'epoch': 3} {'type': 'loss', 'content': 0.10211649537086487, 'timestamp': '2025-09-10 03:04:43.427473', 'step': 22351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:43.482049', 'step': 22351, 'epoch': 3} {'type': 'loss', 'content': 0.13184790313243866, 'timestamp': '2025-09-10 03:04:43.488162', 'step': 22352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:43.540736', 'step': 22352, 'epoch': 3} {'type': 'loss', 'content': 0.06377191841602325, 'timestamp': '2025-09-10 03:04:43.542594', 'step': 22353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 03:04:43.595393', 'step': 22353, 'epoch': 3} {'type': 'loss', 'content': 0.040507737547159195, 'timestamp': '2025-09-10 03:04:43.597381', 'step': 22354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:43.651129', 'step': 22354, 'epoch': 3} {'type': 'loss', 'content': 0.06982258707284927, 'timestamp': '2025-09-10 03:04:43.652829', 'step': 22355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:43.707050', 'step': 22355, 'epoch': 3} {'type': 'loss', 'content': 0.0667869970202446, 'timestamp': '2025-09-10 03:04:43.712548', 'step': 22356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:43.775816', 'step': 22356, 'epoch': 3} {'type': 'loss', 'content': 0.11839429289102554, 'timestamp': '2025-09-10 03:04:43.777923', 'step': 22357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:43.831354', 'step': 22357, 'epoch': 3} {'type': 'loss', 'content': 0.0161440446972847, 'timestamp': '2025-09-10 03:04:43.834493', 'step': 22358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:43.888215', 'step': 22358, 'epoch': 3} {'type': 'loss', 'content': 0.07548608630895615, 'timestamp': '2025-09-10 03:04:43.894445', 'step': 22359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:43.950397', 'step': 22359, 'epoch': 3} {'type': 'loss', 'content': 0.16074560582637787, 'timestamp': '2025-09-10 03:04:43.956385', 'step': 22360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:44.012498', 'step': 22360, 'epoch': 3} {'type': 'loss', 'content': 0.0896759182214737, 'timestamp': '2025-09-10 03:04:44.014528', 'step': 22361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:44.069731', 'step': 22361, 'epoch': 3} {'type': 'loss', 'content': 0.08173061907291412, 'timestamp': '2025-09-10 03:04:44.071485', 'step': 22362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 03:04:44.125135', 'step': 22362, 'epoch': 3} {'type': 'loss', 'content': 0.1561467945575714, 'timestamp': '2025-09-10 03:04:44.136116', 'step': 22363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:44.195211', 'step': 22363, 'epoch': 3} {'type': 'loss', 'content': 0.03254501894116402, 'timestamp': '2025-09-10 03:04:44.201006', 'step': 22364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:44.255264', 'step': 22364, 'epoch': 3} {'type': 'loss', 'content': 0.031502433121204376, 'timestamp': '2025-09-10 03:04:44.264439', 'step': 22365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:44.320858', 'step': 22365, 'epoch': 3} {'type': 'loss', 'content': 0.046746157109737396, 'timestamp': '2025-09-10 03:04:44.323063', 'step': 22366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:44.380078', 'step': 22366, 'epoch': 3} {'type': 'loss', 'content': 0.18980957567691803, 'timestamp': '2025-09-10 03:04:44.382295', 'step': 22367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:44.435690', 'step': 22367, 'epoch': 3} {'type': 'loss', 'content': 0.16729430854320526, 'timestamp': '2025-09-10 03:04:44.441701', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 03:04:57.351856', 'step': 22368, 'epoch': 3} {'type': 'pplx', 'content': 7679.553291350418, 'timestamp': '2025-09-10 03:04:57.354917', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 03:04:57.409417', 'step': 22368, 'epoch': 3} {'type': 'loss', 'content': 0.0848972350358963, 'timestamp': '2025-09-10 03:04:57.411762', 'step': 22369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:57.466609', 'step': 22369, 'epoch': 3} {'type': 'loss', 'content': 0.06463699787855148, 'timestamp': '2025-09-10 03:04:57.468758', 'step': 22370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:57.522702', 'step': 22370, 'epoch': 3} {'type': 'loss', 'content': 0.07106613367795944, 'timestamp': '2025-09-10 03:04:57.524721', 'step': 22371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:57.578945', 'step': 22371, 'epoch': 3} {'type': 'loss', 'content': 0.029243767261505127, 'timestamp': '2025-09-10 03:04:57.585262', 'step': 22372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:57.638406', 'step': 22372, 'epoch': 3} {'type': 'loss', 'content': 0.06953317672014236, 'timestamp': '2025-09-10 03:04:57.640612', 'step': 22373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:57.694276', 'step': 22373, 'epoch': 3} {'type': 'loss', 'content': 0.033022526651620865, 'timestamp': '2025-09-10 03:04:57.696488', 'step': 22374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-10 03:04:57.750029', 'step': 22374, 'epoch': 3} {'type': 'loss', 'content': 0.2374916672706604, 'timestamp': '2025-09-10 03:04:57.752325', 'step': 22375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 03:04:57.806003', 'step': 22375, 'epoch': 3} {'type': 'loss', 'content': 0.0775311216711998, 'timestamp': '2025-09-10 03:04:57.812027', 'step': 22376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:57.865106', 'step': 22376, 'epoch': 3} {'type': 'loss', 'content': 0.0737917348742485, 'timestamp': '2025-09-10 03:04:57.867340', 'step': 22377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-10 03:04:57.921584', 'step': 22377, 'epoch': 3} {'type': 'loss', 'content': 0.05450873449444771, 'timestamp': '2025-09-10 03:04:57.923767', 'step': 22378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-10 03:04:57.977856', 'step': 22378, 'epoch': 3} {'type': 'loss', 'content': 0.04381970688700676, 'timestamp': '2025-09-10 03:04:57.980163', 'step': 22379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1040006410960.0}, 'timestamp': '2025-09-10 03:04:58.034071', 'step': 22379, 'epoch': 3} {'type': 'loss', 'content': 0.0014721325132995844, 'timestamp': '2025-09-10 03:04:58.040108', 'step': 22380, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-10 03:05:10.909059', 'step': 22380, 'epoch': 3} {'type': 'pplx', 'content': 7837.6860222066925, 'timestamp': '2025-09-10 03:05:10.912309', 'step': 22380, 'epoch': 3} {'type': 'best_pplx', 'content': 7679.553291350418, 'timestamp': '2025-09-10 03:05:10.913822', 'step': 22380, 'epoch': 3} {'type': 'best_step', 'content': 22368, 'timestamp': '2025-09-10 03:05:10.915246', 'step': 22380, 'epoch': 3} {'type': 'total_pplx_flops', 'content': 50797850157497600, 'timestamp': '2025-09-10 03:05:10.916403', 'step': 22380, 'epoch': 3} {'type': 'total_train_flops', 'content': 5.161879486776446e+16, 'timestamp': '2025-09-10 03:05:10.918099', 'step': 22380, 'epoch': 3}